ai-zero-token 2.0.6 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +19 -1
  2. package/README.md +5 -5
  3. package/README.zh-CN.md +5 -5
  4. package/admin-ui/dist/assets/accounts-p9bqmijS.js +4 -0
  5. package/admin-ui/dist/assets/{docs--eK_2fzC.js → docs-BQaF_ZMr.js} +1 -1
  6. package/admin-ui/dist/assets/{image-bed-7wBZ1GhS.js → image-bed-D4w1m7k6.js} +1 -1
  7. package/admin-ui/dist/assets/index-BRQrU_AA.css +1 -0
  8. package/admin-ui/dist/assets/{index-CdFYy5j6.js → index-_5Ny0cZf.js} +3 -3
  9. package/admin-ui/dist/assets/{launch-BiD1Khtg.js → launch-BEDxgkQf.js} +1 -1
  10. package/admin-ui/dist/assets/{logs-BdoKDqh2.js → logs-BcL0n0Ld.js} +1 -1
  11. package/admin-ui/dist/assets/{network-detect-BvKns5nQ.js → network-detect-lEfklmIy.js} +1 -1
  12. package/admin-ui/dist/assets/{overview-wm6M45fu.js → overview-DsUMffIU.js} +1 -1
  13. package/admin-ui/dist/assets/{profiles-DMOjJORP.js → profiles-C5SmQvju.js} +1 -1
  14. package/admin-ui/dist/assets/settings-a3HxExcC.js +8 -0
  15. package/admin-ui/dist/assets/{tester-NrARmlis.js → tester-Ca4JOgAq.js} +1 -1
  16. package/admin-ui/dist/assets/usage-hMH0gMZ5.js +1 -0
  17. package/admin-ui/dist/index.html +3 -3
  18. package/dist/cli/commands/help.js +1 -1
  19. package/dist/cli/commands/models.js +3 -2
  20. package/dist/core/context.js +1 -1
  21. package/dist/core/models/openai-codex-models.js +106 -1
  22. package/dist/core/providers/http-client.js +142 -12
  23. package/dist/core/providers/openai-codex/chat.js +139 -8
  24. package/dist/core/services/auth-service.js +104 -7
  25. package/dist/core/services/chat-service.js +16 -18
  26. package/dist/core/services/model-service.js +22 -8
  27. package/dist/core/services/usage-service.js +402 -31
  28. package/dist/core/store/codex-auth-store.js +82 -7
  29. package/dist/server/app.js +410 -34
  30. package/dist/server/index.js +1 -1
  31. package/docs/API_USAGE.md +1 -1
  32. package/docs/DESKTOP_RELEASE.md +12 -1
  33. package/package.json +1 -1
  34. package/admin-ui/dist/assets/accounts-bCDKXGg9.js +0 -4
  35. package/admin-ui/dist/assets/index-C22_3Mxq.css +0 -1
  36. package/admin-ui/dist/assets/settings-DOOu7Kd8.js +0 -5
  37. package/admin-ui/dist/assets/usage-CdWRVMDV.js +0 -1
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { randomUUID } from "node:crypto";
3
3
  import fs from "node:fs/promises";
4
+ import { networkInterfaces } from "node:os";
4
5
  import path from "node:path";
5
6
  import { Readable } from "node:stream";
6
7
  import { promisify } from "node:util";
@@ -15,13 +16,18 @@ import Fastify from "fastify";
15
16
  import cors from "@fastify/cors";
16
17
  import { z } from "zod";
17
18
  import { createGatewayContext } from "../core/context.js";
18
- import { requestText } from "../core/providers/http-client.js";
19
+ import { isTransientHttpError, requestText } from "../core/providers/http-client.js";
19
20
  import { streamOpenAICodex } from "../core/providers/openai-codex/chat.js";
20
21
  import { generateChatGPTWebImage } from "../core/providers/openai-codex/chatgpt-web-image.js";
21
22
  const packageRoot = path.dirname(fileURLToPath(new URL("../../package.json", import.meta.url)));
22
23
  const adminUiDistDir = path.join(packageRoot, "admin-ui", "dist");
23
24
  const adminUiIndexPath = path.join(adminUiDistDir, "index.html");
25
+ const BYTES_PER_MIB = 1024 * 1024;
24
26
  const MAX_GATEWAY_REQUEST_LOGS = 100;
27
+ const MAX_CODEX_RESPONSE_PROFILE_BINDINGS = 5e3;
28
+ const CODEX_STREAM_DRAIN_AFTER_CLIENT_CLOSE_MS = 3e4;
29
+ const DEFAULT_ROUTE_BODY_LIMIT_BYTES = 128 * BYTES_PER_MIB;
30
+ const CODEX_COMPACT_BODY_LIMIT_BYTES = 256 * BYTES_PER_MIB;
25
31
  const gunzipAsync = promisify(gunzip);
26
32
  const inflateAsync = promisify(inflate);
27
33
  const brotliDecompressAsync = promisify(brotliDecompress);
@@ -248,20 +254,47 @@ function isObjectRecord(value) {
248
254
  function tokenNumber(value) {
249
255
  return typeof value === "number" && Number.isFinite(value) && value >= 0 ? Math.trunc(value) : null;
250
256
  }
257
+ function sumTokenNumbers(value, keys) {
258
+ if (!value) {
259
+ return null;
260
+ }
261
+ let total = 0;
262
+ let seen = false;
263
+ for (const key of keys) {
264
+ const item = tokenNumber(value[key]);
265
+ if (item !== null) {
266
+ total += item;
267
+ seen = true;
268
+ }
269
+ }
270
+ return seen ? total : null;
271
+ }
251
272
  function normalizeTokenUsage(value) {
252
273
  if (!isObjectRecord(value)) {
253
274
  return null;
254
275
  }
255
276
  const inputTokens = tokenNumber(value.input_tokens ?? value.prompt_tokens);
256
277
  const outputTokens = tokenNumber(value.output_tokens ?? value.completion_tokens);
257
- const totalTokens = tokenNumber(value.total_tokens) ?? (inputTokens !== null || outputTokens !== null ? (inputTokens ?? 0) + (outputTokens ?? 0) : null);
258
- if (inputTokens === null && outputTokens === null && totalTokens === null) {
278
+ const inputDetails = isObjectRecord(value.input_tokens_details) ? value.input_tokens_details : null;
279
+ const promptDetails = isObjectRecord(value.prompt_tokens_details) ? value.prompt_tokens_details : null;
280
+ const cacheCreation = isObjectRecord(value.cache_creation) ? value.cache_creation : null;
281
+ const openAiCachedTokens = tokenNumber(inputDetails?.cached_tokens ?? promptDetails?.cached_tokens);
282
+ const cacheReadTokens = openAiCachedTokens ?? tokenNumber(value.cache_read_input_tokens ?? value.cached_tokens);
283
+ const cacheCreationTokens = tokenNumber(value.cache_creation_input_tokens ?? value.cache_creation_tokens) ?? tokenNumber(inputDetails?.cache_creation_tokens ?? promptDetails?.cache_creation_tokens) ?? sumTokenNumbers(cacheCreation, ["ephemeral_5m_input_tokens", "ephemeral_1h_input_tokens"]);
284
+ const inputIncludesCacheRead = openAiCachedTokens !== null;
285
+ const inferredTotalTokens = inputTokens !== null || outputTokens !== null || cacheReadTokens !== null || cacheCreationTokens !== null ? (inputTokens ?? 0) + (outputTokens ?? 0) + (inputIncludesCacheRead ? 0 : cacheReadTokens ?? 0) + (cacheCreationTokens ?? 0) : null;
286
+ const totalTokens = tokenNumber(value.total_tokens) ?? inferredTotalTokens;
287
+ const uncachedInputTokens = inputTokens !== null ? inputIncludesCacheRead ? Math.max(0, inputTokens - (cacheReadTokens ?? 0)) : inputTokens : null;
288
+ if (inputTokens === null && outputTokens === null && totalTokens === null && cacheReadTokens === null && cacheCreationTokens === null) {
259
289
  return null;
260
290
  }
261
291
  return {
262
292
  inputTokens,
293
+ uncachedInputTokens,
263
294
  outputTokens,
264
- totalTokens
295
+ totalTokens,
296
+ cacheCreationTokens,
297
+ cacheReadTokens
265
298
  };
266
299
  }
267
300
  function extractTokenUsage(value, depth = 0) {
@@ -302,6 +335,40 @@ function imageUsageToTokenUsage(usage) {
302
335
  totalTokens: usage.total_tokens
303
336
  };
304
337
  }
338
+ function buildResponsesUsagePayload(usage) {
339
+ if (!usage) {
340
+ return void 0;
341
+ }
342
+ const inputTokens = tokenNumber(usage.inputTokens) ?? 0;
343
+ const outputTokens = tokenNumber(usage.outputTokens) ?? 0;
344
+ const totalTokens = tokenNumber(usage.totalTokens) ?? inputTokens + outputTokens;
345
+ const cacheReadTokens = tokenNumber(usage.cacheReadTokens);
346
+ const cacheCreationTokens = tokenNumber(usage.cacheCreationTokens);
347
+ return {
348
+ input_tokens: inputTokens,
349
+ output_tokens: outputTokens,
350
+ total_tokens: totalTokens,
351
+ ...cacheReadTokens !== null ? { input_tokens_details: { cached_tokens: cacheReadTokens } } : {},
352
+ ...cacheCreationTokens !== null ? { cache_creation_input_tokens: cacheCreationTokens } : {}
353
+ };
354
+ }
355
+ function buildChatCompletionsUsagePayload(usage) {
356
+ if (!usage) {
357
+ return void 0;
358
+ }
359
+ const promptTokens = tokenNumber(usage.inputTokens) ?? 0;
360
+ const completionTokens = tokenNumber(usage.outputTokens) ?? 0;
361
+ const totalTokens = tokenNumber(usage.totalTokens) ?? promptTokens + completionTokens;
362
+ const cacheReadTokens = tokenNumber(usage.cacheReadTokens);
363
+ const cacheCreationTokens = tokenNumber(usage.cacheCreationTokens);
364
+ return {
365
+ prompt_tokens: promptTokens,
366
+ completion_tokens: completionTokens,
367
+ total_tokens: totalTokens,
368
+ ...cacheReadTokens !== null ? { prompt_tokens_details: { cached_tokens: cacheReadTokens } } : {},
369
+ ...cacheCreationTokens !== null ? { cache_creation_input_tokens: cacheCreationTokens } : {}
370
+ };
371
+ }
305
372
  function extractUsageErrorType(details, statusCode) {
306
373
  const error = isObjectRecord(details?.error) ? details.error : null;
307
374
  const upstreamErrorCode = error?.upstreamErrorCode;
@@ -610,9 +677,23 @@ function summarizeResponsesRequest(data, endpoint = "/v1/responses") {
610
677
  toolNamesTruncated: toolNames.length > 50,
611
678
  toolChoice: typeof data.tool_choice === "undefined" ? "default" : typeof data.tool_choice,
612
679
  parallelToolCalls: data.parallel_tool_calls,
613
- hasReasoning: Boolean(data.reasoning)
680
+ hasReasoning: Boolean(data.reasoning),
681
+ hasPreviousResponseId: Boolean(getPreviousResponseId(data))
614
682
  };
615
683
  }
684
+ function getPreviousResponseId(data) {
685
+ const direct = data.previous_response_id;
686
+ if (typeof direct === "string" && direct.trim()) {
687
+ return direct.trim();
688
+ }
689
+ const experimental = data.experimental_codex?.body?.previous_response_id;
690
+ return typeof experimental === "string" && experimental.trim() ? experimental.trim() : void 0;
691
+ }
692
+ function removePreviousResponseId(body) {
693
+ const next = { ...body };
694
+ delete next.previous_response_id;
695
+ return next;
696
+ }
616
697
  function createResponsesCodexBody(data) {
617
698
  const experimentalBody = data.experimental_codex?.body ?? {};
618
699
  const body = {
@@ -774,6 +855,7 @@ function summarizeCodexChatBody(body) {
774
855
  model: body.model ?? "default",
775
856
  stream: body.stream,
776
857
  store: body.store,
858
+ hasPromptCacheKey: typeof body.prompt_cache_key === "string" && body.prompt_cache_key.trim().length > 0,
777
859
  inputItems: Array.isArray(body.input) ? body.input.length : void 0,
778
860
  tools: Array.isArray(body.tools) ? body.tools.length : void 0,
779
861
  toolNames: toolNames.slice(0, 50),
@@ -914,10 +996,12 @@ function summarizeImageEditRequestForLog(body) {
914
996
  };
915
997
  }
916
998
  function buildResponseApiBody(result, includeRaw) {
999
+ const usage = buildResponsesUsagePayload(extractTokenUsage(result.raw));
917
1000
  const responseBody = {
918
1001
  object: "response",
919
1002
  provider: result.provider,
920
1003
  model: result.model,
1004
+ ...usage ? { usage } : {},
921
1005
  output_text: result.text,
922
1006
  output: [
923
1007
  {
@@ -942,11 +1026,13 @@ function buildResponseApiBody(result, includeRaw) {
942
1026
  }
943
1027
  function buildChatCompletionsBody(result) {
944
1028
  const hasToolCalls = result.toolCalls.length > 0;
1029
+ const usage = buildChatCompletionsUsagePayload(extractTokenUsage(result.raw));
945
1030
  const body = {
946
1031
  id: `chatcmpl_${randomUUID().replace(/-/g, "")}`,
947
1032
  object: "chat.completion",
948
1033
  created: Math.floor(Date.now() / 1e3),
949
1034
  model: result.model,
1035
+ ...usage ? { usage } : {},
950
1036
  choices: [
951
1037
  {
952
1038
  index: 0,
@@ -984,7 +1070,7 @@ function buildChatCompletionChunk(params) {
984
1070
  ]
985
1071
  };
986
1072
  }
987
- function sendChatCompletionsStream(reply, result) {
1073
+ function sendChatCompletionsStream(reply, result, includeUsage = false) {
988
1074
  const id = `chatcmpl_${randomUUID().replace(/-/g, "")}`;
989
1075
  const created = Math.floor(Date.now() / 1e3);
990
1076
  reply.raw.writeHead(200, {
@@ -1034,6 +1120,17 @@ function sendChatCompletionsStream(reply, result) {
1034
1120
  delta: {},
1035
1121
  finishReason: result.toolCalls.length > 0 ? "tool_calls" : "stop"
1036
1122
  }));
1123
+ const usage = includeUsage ? buildChatCompletionsUsagePayload(extractTokenUsage(result.raw)) : void 0;
1124
+ if (usage) {
1125
+ writeChatCompletionsSseEvent(reply, {
1126
+ id,
1127
+ object: "chat.completion.chunk",
1128
+ created,
1129
+ model: result.model,
1130
+ choices: [],
1131
+ usage
1132
+ });
1133
+ }
1037
1134
  reply.raw.write("data: [DONE]\n\n");
1038
1135
  reply.raw.end();
1039
1136
  }
@@ -1118,6 +1215,57 @@ function resolveOrigin(request) {
1118
1215
  }
1119
1216
  return "http://127.0.0.1:8787";
1120
1217
  }
1218
+ function isLoopbackHost(host) {
1219
+ return host === "localhost" || host === "127.0.0.1" || host === "::1" || host === "[::1]";
1220
+ }
1221
+ function isPrivateIpv4(address) {
1222
+ if (address.startsWith("10.")) {
1223
+ return true;
1224
+ }
1225
+ if (address.startsWith("192.168.")) {
1226
+ return true;
1227
+ }
1228
+ const match = address.match(/^172\.(\d+)\./);
1229
+ if (!match) {
1230
+ return false;
1231
+ }
1232
+ const second = Number.parseInt(match[1] ?? "", 10);
1233
+ return second >= 16 && second <= 31;
1234
+ }
1235
+ function getLanIpv4Addresses() {
1236
+ const seen = /* @__PURE__ */ new Set();
1237
+ const addresses = [];
1238
+ const interfaces = networkInterfaces();
1239
+ for (const [name, details] of Object.entries(interfaces)) {
1240
+ for (const detail of details ?? []) {
1241
+ const family = String(detail.family);
1242
+ const isIpv4 = family === "IPv4" || family === "4";
1243
+ if (!isIpv4 || detail.internal || seen.has(detail.address)) {
1244
+ continue;
1245
+ }
1246
+ if (detail.address === "0.0.0.0" || detail.address.startsWith("127.") || detail.address.startsWith("169.254.")) {
1247
+ continue;
1248
+ }
1249
+ seen.add(detail.address);
1250
+ addresses.push({
1251
+ address: detail.address,
1252
+ label: name,
1253
+ private: isPrivateIpv4(detail.address)
1254
+ });
1255
+ }
1256
+ }
1257
+ return addresses.sort((left, right) => Number(right.private) - Number(left.private) || left.address.localeCompare(right.address, "en")).map(({ address, label }) => ({ address, label }));
1258
+ }
1259
+ function createShareAddress(protocol, host, port, label) {
1260
+ const origin = `${protocol}://${host}:${port}`;
1261
+ return {
1262
+ host,
1263
+ label,
1264
+ adminUrl: `${origin}/`,
1265
+ baseUrl: `${origin}/v1`,
1266
+ codexBaseUrl: `${origin}/codex/v1`
1267
+ };
1268
+ }
1121
1269
  function normalizeError(error) {
1122
1270
  return error instanceof Error ? error : new Error(String(error));
1123
1271
  }
@@ -1139,18 +1287,39 @@ function getErrorStatusCode(error) {
1139
1287
  }
1140
1288
  return 500;
1141
1289
  }
1142
- function isQuotaLimitError(error) {
1143
- const normalized = normalizeError(error);
1144
- const marker = `${normalized.upstreamErrorCode ?? ""} ${normalized.upstreamErrorType ?? ""} ${normalized.message}`.toLowerCase();
1145
- return normalized.upstreamStatus === 429 || marker.includes("usage_limit_reached");
1290
+ function formatBytesAsMiB(bytes) {
1291
+ if (typeof bytes !== "number" || !Number.isFinite(bytes) || bytes <= 0) {
1292
+ return "\u672A\u77E5";
1293
+ }
1294
+ return `${Math.round(bytes / BYTES_PER_MIB * 10) / 10} MB`;
1146
1295
  }
1147
1296
  function createSseStreamStats() {
1148
1297
  return {
1149
1298
  buffer: "",
1150
1299
  bytes: 0,
1151
- completed: false
1300
+ completed: false,
1301
+ responseIds: /* @__PURE__ */ new Set(),
1302
+ tokenUsage: null,
1303
+ parseErrorCount: 0
1152
1304
  };
1153
1305
  }
1306
+ function extractSseResponseId(value) {
1307
+ if (!isObjectRecord(value)) {
1308
+ return void 0;
1309
+ }
1310
+ const directId = value.id;
1311
+ if (typeof directId === "string" && directId.startsWith("resp_")) {
1312
+ return directId;
1313
+ }
1314
+ const response = value.response;
1315
+ if (isObjectRecord(response) && typeof response.id === "string" && response.id.startsWith("resp_")) {
1316
+ return response.id;
1317
+ }
1318
+ return void 0;
1319
+ }
1320
+ function isSseTerminalUsageEvent(eventType) {
1321
+ return eventType === "response.completed" || eventType === "response.done" || eventType === "response.failed" || eventType === "response.incomplete";
1322
+ }
1154
1323
  function trackSseChunk(stats, chunk) {
1155
1324
  const text = typeof chunk === "string" ? chunk : chunk instanceof Uint8Array ? Buffer.from(chunk).toString("utf8") : String(chunk);
1156
1325
  stats.bytes += Buffer.byteLength(text);
@@ -1168,10 +1337,19 @@ function trackSseChunk(stats, chunk) {
1168
1337
  if (typeof parsed.type === "string") {
1169
1338
  eventType = parsed.type;
1170
1339
  }
1340
+ const responseId = extractSseResponseId(parsed);
1341
+ if (responseId) {
1342
+ stats.responseIds.add(responseId);
1343
+ }
1344
+ const tokenUsage = isSseTerminalUsageEvent(eventType) ? extractTokenUsage(parsed) : null;
1345
+ if (tokenUsage) {
1346
+ stats.tokenUsage = tokenUsage;
1347
+ }
1171
1348
  } catch {
1349
+ stats.parseErrorCount += 1;
1172
1350
  }
1173
1351
  }
1174
- if (eventType === "response.completed") {
1352
+ if (eventType === "response.completed" || eventType === "response.done") {
1175
1353
  stats.completed = true;
1176
1354
  stats.terminalEvent = eventType;
1177
1355
  } else if (eventType === "response.failed" || eventType === "response.incomplete") {
@@ -1183,10 +1361,30 @@ function trackSseChunk(stats, chunk) {
1183
1361
  stats.buffer = stats.buffer.slice(-65536);
1184
1362
  }
1185
1363
  }
1364
+ function sseTokenUsageStatus(stats, statusCode) {
1365
+ if (stats.tokenUsage) {
1366
+ return "captured";
1367
+ }
1368
+ if (statusCode < 200 || statusCode >= 400) {
1369
+ return "upstream_error";
1370
+ }
1371
+ if (stats.parseErrorCount > 0 && !stats.terminalEvent) {
1372
+ return "parse_failed";
1373
+ }
1374
+ if (!stats.terminalEvent) {
1375
+ return "missing_terminal";
1376
+ }
1377
+ if (isSseTerminalUsageEvent(stats.terminalEvent)) {
1378
+ return "terminal_without_usage";
1379
+ }
1380
+ return "not_returned";
1381
+ }
1186
1382
  function createApp(params) {
1383
+ const defaultBodyLimit = params?.bodyLimit ?? DEFAULT_ROUTE_BODY_LIMIT_BYTES;
1384
+ const codexCompactBodyLimit = Math.max(defaultBodyLimit, CODEX_COMPACT_BODY_LIMIT_BYTES);
1187
1385
  const app = Fastify({
1188
1386
  logger: false,
1189
- bodyLimit: params?.bodyLimit
1387
+ bodyLimit: defaultBodyLimit
1190
1388
  });
1191
1389
  app.removeContentTypeParser("application/json");
1192
1390
  app.addContentTypeParser(
@@ -1198,6 +1396,22 @@ function createApp(params) {
1198
1396
  );
1199
1397
  const ctx = createGatewayContext();
1200
1398
  const gatewayRequestLogs = [];
1399
+ const codexResponseProfileBindings = /* @__PURE__ */ new Map();
1400
+ function rememberCodexResponseProfile(responseId, profile) {
1401
+ codexResponseProfileBindings.set(responseId, {
1402
+ profileId: profile.profileId,
1403
+ accountId: profile.accountId,
1404
+ seenAt: Date.now()
1405
+ });
1406
+ if (codexResponseProfileBindings.size <= MAX_CODEX_RESPONSE_PROFILE_BINDINGS) {
1407
+ return;
1408
+ }
1409
+ const overflow = codexResponseProfileBindings.size - MAX_CODEX_RESPONSE_PROFILE_BINDINGS;
1410
+ const oldest = Array.from(codexResponseProfileBindings.entries()).sort((left, right) => left[1].seenAt - right[1].seenAt).slice(0, overflow);
1411
+ for (const [key] of oldest) {
1412
+ codexResponseProfileBindings.delete(key);
1413
+ }
1414
+ }
1201
1415
  function pushGatewayRequestLog(log) {
1202
1416
  const entry = {
1203
1417
  id: log.id ?? randomUUID(),
@@ -1231,6 +1445,7 @@ function createApp(params) {
1231
1445
  accountLabel: entry.account,
1232
1446
  planType: profile?.quota?.planType,
1233
1447
  tokenUsage: log.usage?.tokenUsage,
1448
+ tokenUsageStatus: log.usage?.tokenUsageStatus,
1234
1449
  imageCount: log.usage?.imageCount,
1235
1450
  imageRoute: log.usage?.imageRoute ?? "none",
1236
1451
  errorType: log.usage?.errorType ?? extractUsageErrorType(log.details, entry.statusCode)
@@ -1246,18 +1461,22 @@ function createApp(params) {
1246
1461
  app.setErrorHandler((error, request, reply) => {
1247
1462
  const normalized = normalizeError(error);
1248
1463
  const statusCode = getErrorStatusCode(normalized);
1464
+ const isBodyTooLarge = statusCode === 413;
1465
+ const message = isBodyTooLarge ? `\u8BF7\u6C42\u4F53\u8FC7\u5927\uFF0C\u5F53\u524D\u7F51\u5173\u9ED8\u8BA4\u4E0A\u9650 ${formatBytesAsMiB(defaultBodyLimit)}\uFF0CCodex compact \u4E0A\u9650 ${formatBytesAsMiB(codexCompactBodyLimit)}\u3002\u5982\u4ECD\u4E0D\u591F\uFF0C\u8BF7\u7528 AZT_BODY_LIMIT_MB \u8C03\u5927\u540E\u91CD\u542F\u7F51\u5173\u3002` : normalized.message;
1249
1466
  console.error("[gateway:error]", {
1250
1467
  method: request.method,
1251
1468
  url: request.url,
1252
1469
  statusCode,
1253
- message: normalized.message,
1470
+ message,
1471
+ code: normalized.code,
1472
+ upstreamRequestId: normalized.requestId,
1254
1473
  stack: normalized.stack
1255
1474
  });
1256
1475
  reply.code(statusCode);
1257
1476
  return {
1258
1477
  error: {
1259
1478
  type: "gateway_error",
1260
- message: normalized.message
1479
+ message
1261
1480
  }
1262
1481
  };
1263
1482
  });
@@ -1265,6 +1484,7 @@ function createApp(params) {
1265
1484
  data: gatewayRequestLogs
1266
1485
  }));
1267
1486
  app.get("/_gateway/admin/usage", async () => ctx.usageService.getSummary());
1487
+ app.post("/_gateway/admin/usage/reset", async () => ctx.usageService.backupAndReset());
1268
1488
  async function buildAdminConfig(request) {
1269
1489
  const [status, models, modelCatalog, versionStatus, settings, profile, profiles, codexStatus, usage] = await Promise.all([
1270
1490
  ctx.authService.getStatus(),
@@ -1404,6 +1624,26 @@ function createApp(params) {
1404
1624
  };
1405
1625
  });
1406
1626
  app.get("/_gateway/admin/config", async (request) => buildAdminConfig(request));
1627
+ app.get("/_gateway/admin/share", async (request) => {
1628
+ const status = await ctx.authService.getStatus();
1629
+ const protocol = request.protocol === "https" ? "https" : "http";
1630
+ const port = request.raw.socket.localPort || status.serverPort;
1631
+ const serverHost = status.serverHost || "0.0.0.0";
1632
+ const lanReachable = serverHost === "0.0.0.0" || serverHost === "::" || !isLoopbackHost(serverHost);
1633
+ const addresses = getLanIpv4Addresses().map((item) => createShareAddress(protocol, item.address, port, item.label));
1634
+ const requestHost = request.headers.host?.replace(/:\d+$/u, "");
1635
+ if (requestHost && !isLoopbackHost(requestHost) && !addresses.some((item) => item.host === requestHost)) {
1636
+ addresses.unshift(createShareAddress(protocol, requestHost, port, "\u5F53\u524D\u8BBF\u95EE\u5730\u5740"));
1637
+ }
1638
+ return {
1639
+ primary: lanReachable ? addresses[0] ?? null : null,
1640
+ addresses,
1641
+ local: createShareAddress(protocol, "127.0.0.1", port, "\u672C\u673A"),
1642
+ serverHost,
1643
+ serverPort: port,
1644
+ lanReachable
1645
+ };
1646
+ });
1407
1647
  app.post("/_gateway/admin/login", async (request) => {
1408
1648
  await ctx.authService.login("openai-codex");
1409
1649
  await ctx.authService.syncActiveProfileQuota("openai-codex", {
@@ -1776,20 +2016,68 @@ function createApp(params) {
1776
2016
  const abortController = new AbortController();
1777
2017
  let streamFinished = false;
1778
2018
  let headersCommitted = false;
2019
+ let clientDisconnected = false;
2020
+ let clientDrainTimer = null;
1779
2021
  let profile = null;
1780
2022
  let retryCount = 0;
1781
2023
  let failureRecorded = false;
1782
2024
  let codexImageRoute = "none";
2025
+ const originalPreviousResponseId = getPreviousResponseId(data);
2026
+ let adventureFallbackUsed = false;
2027
+ let adventureFallbackReason;
1783
2028
  reply.raw.on("close", () => {
1784
2029
  if (!streamFinished) {
1785
- abortController.abort();
2030
+ clientDisconnected = true;
2031
+ if (!headersCommitted) {
2032
+ abortController.abort();
2033
+ return;
2034
+ }
2035
+ clientDrainTimer = setTimeout(() => {
2036
+ abortController.abort();
2037
+ }, CODEX_STREAM_DRAIN_AFTER_CLIENT_CLOSE_MS);
2038
+ clientDrainTimer.unref?.();
1786
2039
  }
1787
2040
  });
1788
2041
  try {
1789
2042
  const model = await ctx.modelService.resolveModel("openai-codex", data.model, {
1790
2043
  allowUnknown: data.experimental_codex?.allow_unknown_model
1791
2044
  });
1792
- const codexBody = createCodexPassthroughBody(data, model);
2045
+ let codexBody = createCodexPassthroughBody(data, model);
2046
+ let activePreviousResponseId = originalPreviousResponseId;
2047
+ let keepProfileSticky = Boolean(activePreviousResponseId);
2048
+ let stickyProfileId = activePreviousResponseId ? codexResponseProfileBindings.get(activePreviousResponseId)?.profileId : void 0;
2049
+ const useAdventureFallback = async (error, quota) => {
2050
+ if (!keepProfileSticky || abortController.signal.aborted) {
2051
+ return false;
2052
+ }
2053
+ const failedProfileId = profile?.profileId ?? stickyProfileId;
2054
+ if (failedProfileId) {
2055
+ await ctx.authService.recordProfileRequestFailure(failedProfileId, error, quota, "openai-codex", {
2056
+ skipAutoSwitch: true
2057
+ });
2058
+ }
2059
+ codexBody = removePreviousResponseId(codexBody);
2060
+ activePreviousResponseId = void 0;
2061
+ keepProfileSticky = false;
2062
+ stickyProfileId = void 0;
2063
+ adventureFallbackUsed = true;
2064
+ adventureFallbackReason = error instanceof Error ? error.message : String(error);
2065
+ retryCount += 1;
2066
+ profile = null;
2067
+ failureRecorded = false;
2068
+ console.warn("[gateway:codex:stream] sticky continuation failed; dropping previous_response_id and retrying as new session", {
2069
+ requestId: request.id,
2070
+ model,
2071
+ retryCount,
2072
+ previousResponseId: "[present]",
2073
+ failedAccount: failedProfileId,
2074
+ errorCode: error.code,
2075
+ upstreamStatus: error.upstreamStatus,
2076
+ upstreamRequestId: error.requestId,
2077
+ message: adventureFallbackReason
2078
+ });
2079
+ return true;
2080
+ };
1793
2081
  const imageRequest = upstreamEndpoint === "responses" ? extractCodexImageGenerationRequest(codexBody) : null;
1794
2082
  if (imageRequest) {
1795
2083
  codexImageRoute = "codex-tool";
@@ -1867,9 +2155,13 @@ function createApp(params) {
1867
2155
  }
1868
2156
  let upstream = null;
1869
2157
  const maxProfileAttempts = 5;
2158
+ const maxTransientStreamRetries = 1;
2159
+ let transientStreamRetryCount = 0;
1870
2160
  for (let attempt = 0; attempt < maxProfileAttempts; attempt += 1) {
1871
- profile = await ctx.authService.requireUsableProfile("openai-codex");
1872
2161
  try {
2162
+ profile = stickyProfileId ? await ctx.authService.requireUsableProfileById(stickyProfileId, "openai-codex") : await ctx.authService.requireUsableProfile("openai-codex", {
2163
+ skipAutoSwitch: keepProfileSticky
2164
+ });
1873
2165
  upstream = await streamOpenAICodex({
1874
2166
  profile,
1875
2167
  model,
@@ -1881,9 +2173,31 @@ function createApp(params) {
1881
2173
  break;
1882
2174
  } catch (error) {
1883
2175
  const quota = error.quota;
1884
- const switchedProfile = await ctx.authService.recordProfileRequestFailure(profile.profileId, error, quota, "openai-codex");
2176
+ if (keepProfileSticky && attempt < maxProfileAttempts - 1 && await useAdventureFallback(error, quota)) {
2177
+ continue;
2178
+ }
2179
+ if (!keepProfileSticky && isTransientHttpError(error) && transientStreamRetryCount < maxTransientStreamRetries && attempt < maxProfileAttempts - 1 && !abortController.signal.aborted) {
2180
+ transientStreamRetryCount += 1;
2181
+ retryCount += 1;
2182
+ console.warn("[gateway:codex:stream] transient curl stream failure before headers; retrying request", {
2183
+ requestId: request.id,
2184
+ account: profileLogLabel(profile),
2185
+ model,
2186
+ retryCount,
2187
+ errorCode: error.code,
2188
+ upstreamRequestId: error.requestId,
2189
+ message: error instanceof Error ? error.message : String(error)
2190
+ });
2191
+ continue;
2192
+ }
2193
+ if (!profile) {
2194
+ throw error;
2195
+ }
2196
+ const switchedProfile = await ctx.authService.recordProfileRequestFailure(profile.profileId, error, quota, "openai-codex", {
2197
+ skipAutoSwitch: keepProfileSticky
2198
+ });
1885
2199
  failureRecorded = true;
1886
- if (attempt < maxProfileAttempts - 1 && isQuotaLimitError(error) && switchedProfile && switchedProfile.profileId !== profile.profileId && !abortController.signal.aborted) {
2200
+ if (!keepProfileSticky && attempt < maxProfileAttempts - 1 && ctx.authService.isRotationTrigger(error, quota) && switchedProfile && switchedProfile.profileId !== profile.profileId && !abortController.signal.aborted) {
1887
2201
  retryCount += 1;
1888
2202
  failureRecorded = false;
1889
2203
  continue;
@@ -1894,13 +2208,18 @@ function createApp(params) {
1894
2208
  if (!upstream || !profile) {
1895
2209
  throw new Error("Codex stream \u672A\u80FD\u5EFA\u7ACB\u3002");
1896
2210
  }
1897
- await ctx.authService.recordProfileRequestSuccess(profile.profileId, upstream.quota, "openai-codex");
2211
+ await ctx.authService.recordProfileRequestSuccess(profile.profileId, upstream.quota, "openai-codex", {
2212
+ skipAutoSwitch: keepProfileSticky
2213
+ });
1898
2214
  const headers = {
1899
2215
  "Content-Type": upstream.headers["content-type"] ?? "text/event-stream; charset=utf-8",
1900
2216
  "Cache-Control": "no-cache, no-transform",
1901
2217
  Connection: "keep-alive",
1902
2218
  "X-Accel-Buffering": "no"
1903
2219
  };
2220
+ if (adventureFallbackUsed) {
2221
+ headers["X-AZT-Codex-Continuation-Mode"] = "adventure-fallback";
2222
+ }
1904
2223
  for (const [key, value] of Object.entries(upstream.headers)) {
1905
2224
  if (key.startsWith("x-codex-") || key === "x-request-id") {
1906
2225
  headers[key] = value;
@@ -1910,14 +2229,42 @@ function createApp(params) {
1910
2229
  headersCommitted = true;
1911
2230
  reply.raw.flushHeaders?.();
1912
2231
  const streamStats = createSseStreamStats();
2232
+ const writeChunkToClient = async (chunk) => {
2233
+ if (clientDisconnected || reply.raw.destroyed || reply.raw.writableEnded) {
2234
+ clientDisconnected = true;
2235
+ return;
2236
+ }
2237
+ try {
2238
+ if (!reply.raw.write(chunk)) {
2239
+ await new Promise((resolve) => {
2240
+ const cleanup = () => {
2241
+ reply.raw.off("drain", cleanup);
2242
+ reply.raw.off("close", cleanup);
2243
+ resolve();
2244
+ };
2245
+ reply.raw.once("drain", cleanup);
2246
+ reply.raw.once("close", cleanup);
2247
+ });
2248
+ }
2249
+ } catch {
2250
+ clientDisconnected = true;
2251
+ }
2252
+ };
1913
2253
  for await (const chunk of Readable.fromWeb(upstream.body)) {
1914
2254
  trackSseChunk(streamStats, chunk);
1915
- if (!reply.raw.write(chunk)) {
1916
- await new Promise((resolve) => reply.raw.once("drain", resolve));
1917
- }
2255
+ await writeChunkToClient(chunk);
1918
2256
  }
1919
2257
  streamFinished = true;
1920
- reply.raw.end();
2258
+ if (clientDrainTimer) {
2259
+ clearTimeout(clientDrainTimer);
2260
+ clientDrainTimer = null;
2261
+ }
2262
+ if (!clientDisconnected && !reply.raw.destroyed && !reply.raw.writableEnded) {
2263
+ reply.raw.end();
2264
+ }
2265
+ for (const responseId of streamStats.responseIds) {
2266
+ rememberCodexResponseProfile(responseId, profile);
2267
+ }
1921
2268
  if (!streamStats.completed) {
1922
2269
  console.warn("[gateway:codex:stream] upstream stream ended without response.completed", {
1923
2270
  requestId: request.id,
@@ -1947,21 +2294,39 @@ function createApp(params) {
1947
2294
  passthrough: true,
1948
2295
  upstreamEndpoint,
1949
2296
  retryCount,
2297
+ profileSticky: keepProfileSticky,
2298
+ previousResponseId: originalPreviousResponseId ? "[present]" : void 0,
2299
+ previousResponseDropped: adventureFallbackUsed,
2300
+ adventureFallbackReason: adventureFallbackUsed ? truncateForLog(adventureFallbackReason ?? "") : void 0,
2301
+ stickyProfileResolved: Boolean(stickyProfileId),
2302
+ responseIdsTracked: streamStats.responseIds.size,
1950
2303
  completed: streamStats.completed,
1951
2304
  terminalEvent: streamStats.terminalEvent,
1952
- bytes: streamStats.bytes
2305
+ bytes: streamStats.bytes,
2306
+ usageCaptured: Boolean(streamStats.tokenUsage),
2307
+ tokenUsageStatus: sseTokenUsageStatus(streamStats, upstream.status),
2308
+ parseErrorCount: streamStats.parseErrorCount,
2309
+ clientDisconnected
1953
2310
  }
1954
2311
  },
1955
2312
  usage: {
1956
2313
  profile,
2314
+ tokenUsage: streamStats.tokenUsage,
2315
+ tokenUsageStatus: sseTokenUsageStatus(streamStats, upstream.status),
1957
2316
  imageRoute: codexImageRoute
1958
2317
  }
1959
2318
  });
1960
2319
  return reply;
1961
2320
  } catch (error) {
2321
+ if (clientDrainTimer) {
2322
+ clearTimeout(clientDrainTimer);
2323
+ clientDrainTimer = null;
2324
+ }
1962
2325
  const quota = error.quota;
1963
2326
  if (profile && !failureRecorded) {
1964
- await ctx.authService.recordProfileRequestFailure(profile.profileId, error, quota, "openai-codex");
2327
+ await ctx.authService.recordProfileRequestFailure(profile.profileId, error, quota, "openai-codex", {
2328
+ skipAutoSwitch: Boolean(originalPreviousResponseId) && !adventureFallbackUsed
2329
+ });
1965
2330
  }
1966
2331
  const normalized = normalizeError(error);
1967
2332
  const statusCode = getErrorStatusCode(normalized);
@@ -1980,10 +2345,17 @@ function createApp(params) {
1980
2345
  request: summarizeResponsesRequest(data, request.url),
1981
2346
  response: {
1982
2347
  upstreamEndpoint,
1983
- retryCount
2348
+ retryCount,
2349
+ profileSticky: Boolean(originalPreviousResponseId) && !adventureFallbackUsed,
2350
+ previousResponseId: originalPreviousResponseId ? "[present]" : void 0,
2351
+ previousResponseDropped: adventureFallbackUsed,
2352
+ adventureFallbackReason: adventureFallbackUsed ? truncateForLog(adventureFallbackReason ?? "") : void 0,
2353
+ stickyProfileResolved: Boolean(originalPreviousResponseId && codexResponseProfileBindings.has(originalPreviousResponseId))
1984
2354
  },
1985
2355
  error: {
1986
2356
  message: normalized.message,
2357
+ code: normalized.code,
2358
+ upstreamRequestId: normalized.requestId,
1987
2359
  upstreamStatus: normalized.upstreamStatus,
1988
2360
  upstreamErrorCode: normalized.upstreamErrorCode,
1989
2361
  upstreamErrorMessage: normalized.upstreamErrorMessage
@@ -2034,7 +2406,7 @@ function createApp(params) {
2034
2406
  }
2035
2407
  return handleCodexResponsesPassthrough(request, reply, parsed.data, startedAt);
2036
2408
  });
2037
- app.post("/codex/v1/responses/compact", async (request, reply) => {
2409
+ app.post("/codex/v1/responses/compact", { bodyLimit: codexCompactBodyLimit }, async (request, reply) => {
2038
2410
  const startedAt = performance.now();
2039
2411
  const parsed = responsesBodySchema.safeParse(request.body);
2040
2412
  if (!parsed.success) {
@@ -2198,7 +2570,7 @@ function createApp(params) {
2198
2570
  });
2199
2571
  throw error;
2200
2572
  }
2201
- const activeProfile = await ctx.authService.getActiveProfile();
2573
+ const activeProfile = result.profile ?? await ctx.authService.getActiveProfile();
2202
2574
  pushGatewayRequestLog({
2203
2575
  method: request.method,
2204
2576
  endpoint: request.url,
@@ -2216,7 +2588,8 @@ function createApp(params) {
2216
2588
  response: {
2217
2589
  textPreview: truncateForLog(result.text),
2218
2590
  textLength: result.text.length,
2219
- artifactCount: result.artifacts.length
2591
+ artifactCount: result.artifacts.length,
2592
+ retryCount: result.retryCount ?? 0
2220
2593
  }
2221
2594
  },
2222
2595
  usage: {
@@ -2333,7 +2706,7 @@ function createApp(params) {
2333
2706
  });
2334
2707
  throw error;
2335
2708
  }
2336
- const activeProfile = await ctx.authService.getActiveProfile();
2709
+ const activeProfile = result.profile ?? await ctx.authService.getActiveProfile();
2337
2710
  pushGatewayRequestLog({
2338
2711
  method: request.method,
2339
2712
  endpoint: request.url,
@@ -2358,7 +2731,8 @@ function createApp(params) {
2358
2731
  argumentsPreview: truncateForLog(toolCall.function.arguments)
2359
2732
  })),
2360
2733
  artifactCount: result.artifacts.length,
2361
- stream: parsed.data.stream ?? false
2734
+ stream: parsed.data.stream ?? false,
2735
+ retryCount: result.retryCount ?? 0
2362
2736
  }
2363
2737
  },
2364
2738
  usage: {
@@ -2376,7 +2750,9 @@ function createApp(params) {
2376
2750
  artifactCount: result.artifacts.length
2377
2751
  });
2378
2752
  if (parsed.data.stream) {
2379
- sendChatCompletionsStream(reply, result);
2753
+ const rawStreamOptions = parsed.data.stream_options;
2754
+ const streamOptions = isObjectRecord(rawStreamOptions) ? rawStreamOptions : null;
2755
+ sendChatCompletionsStream(reply, result, streamOptions?.include_usage === true);
2380
2756
  return reply;
2381
2757
  }
2382
2758
  return buildChatCompletionsBody(result);