@adversity/coding-tool-x 3.1.1 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +24 -0
  2. package/dist/web/assets/{ConfigTemplates-ZrK_s7ma.js → ConfigTemplates-DvcbKKdS.js} +1 -1
  3. package/dist/web/assets/Home-BJKPCBuk.css +1 -0
  4. package/dist/web/assets/Home-Cw-F_Wnu.js +1 -0
  5. package/dist/web/assets/{PluginManager-BD7QUZbU.js → PluginManager-jy_4GVxI.js} +1 -1
  6. package/dist/web/assets/{ProjectList-DRb1DuHV.js → ProjectList-Df1-NcNr.js} +1 -1
  7. package/dist/web/assets/{SessionList-lZ0LKzfT.js → SessionList-UWcZtC2r.js} +1 -1
  8. package/dist/web/assets/{SkillManager-C1xG5B4Q.js → SkillManager-IRdseMKB.js} +1 -1
  9. package/dist/web/assets/{Terminal-DksBo_lM.js → Terminal-BasTyDut.js} +1 -1
  10. package/dist/web/assets/{WorkspaceManager-Burx7XOo.js → WorkspaceManager-D-D2kK1V.js} +1 -1
  11. package/dist/web/assets/index-CoB3zF0K.css +1 -0
  12. package/dist/web/assets/index-CryrSLv8.js +2 -0
  13. package/dist/web/index.html +2 -2
  14. package/package.json +1 -1
  15. package/src/config/default.js +2 -0
  16. package/src/config/model-metadata.js +415 -0
  17. package/src/config/model-pricing.js +23 -93
  18. package/src/server/api/opencode-channels.js +84 -6
  19. package/src/server/api/opencode-proxy.js +41 -32
  20. package/src/server/api/opencode-sessions.js +4 -62
  21. package/src/server/api/settings.js +111 -0
  22. package/src/server/codex-proxy-server.js +6 -4
  23. package/src/server/gemini-proxy-server.js +6 -4
  24. package/src/server/index.js +13 -4
  25. package/src/server/opencode-proxy-server.js +1197 -86
  26. package/src/server/proxy-server.js +6 -4
  27. package/src/server/services/codex-sessions.js +105 -6
  28. package/src/server/services/env-checker.js +24 -1
  29. package/src/server/services/env-manager.js +29 -1
  30. package/src/server/services/opencode-channels.js +3 -1
  31. package/src/server/services/opencode-sessions.js +486 -218
  32. package/src/server/services/opencode-settings-manager.js +172 -36
  33. package/src/server/services/response-decoder.js +21 -0
  34. package/src/server/websocket-server.js +24 -5
  35. package/dist/web/assets/Home-B8YfhZ3c.js +0 -1
  36. package/dist/web/assets/Home-Di2qsylF.css +0 -1
  37. package/dist/web/assets/index-Ufv5rCa5.css +0 -1
  38. package/dist/web/assets/index-lAkrRC3h.js +0 -2
@@ -17,7 +17,7 @@ const { resolvePricing } = require('./utils/pricing');
17
17
  const { recordRequest: recordOpenCodeRequest } = require('./services/opencode-statistics-service');
18
18
  const { saveProxyStartTime, clearProxyStartTime, getProxyStartTime, getProxyRuntime } = require('./services/proxy-runtime');
19
19
  const { getEnabledChannels, getEffectiveApiKey } = require('./services/opencode-channels');
20
- const { probeModelAvailability, fetchModelsFromProvider } = require('./services/model-detector');
20
+ const { probeModelAvailability, fetchModelsFromProvider, getCachedModelInfo } = require('./services/model-detector');
21
21
  const { CLAUDE_MODEL_PRICING } = require('../config/model-pricing');
22
22
 
23
23
  let proxyServer = null;
@@ -60,6 +60,21 @@ const GEMINI_CLI_CLIENT_METADATA = 'ideType=IDE_UNSPECIFIED,platform=PLATFORM_UN
60
60
  const CLAUDE_SESSION_USER_ID_TTL_MS = 60 * 60 * 1000;
61
61
  const CLAUDE_SESSION_USER_ID_CACHE_MAX = 2000;
62
62
  const claudeSessionUserIdCache = new Map();
63
+ const FILE_EXTENSION_MIME_TYPES = {
64
+ '.pdf': 'application/pdf',
65
+ '.txt': 'text/plain',
66
+ '.md': 'text/markdown',
67
+ '.csv': 'text/csv',
68
+ '.json': 'application/json',
69
+ '.xml': 'application/xml',
70
+ '.html': 'text/html',
71
+ '.png': 'image/png',
72
+ '.jpg': 'image/jpeg',
73
+ '.jpeg': 'image/jpeg',
74
+ '.gif': 'image/gif',
75
+ '.webp': 'image/webp',
76
+ '.svg': 'image/svg+xml'
77
+ };
63
78
 
64
79
  /**
65
80
  * 检测模型层级
@@ -216,9 +231,17 @@ function calculateCost(model, tokens) {
216
231
  pricing = resolvePricing('opencode', pricing, OPENCODE_BASE_PRICING);
217
232
  const inputRate = typeof pricing.input === 'number' ? pricing.input : OPENCODE_BASE_PRICING.input;
218
233
  const outputRate = typeof pricing.output === 'number' ? pricing.output : OPENCODE_BASE_PRICING.output;
234
+ const cacheCreationRate = typeof pricing.cacheCreation === 'number' ? pricing.cacheCreation : inputRate * 1.25;
235
+ const cacheReadRate = typeof pricing.cacheRead === 'number' ? pricing.cacheRead : inputRate * 0.1;
236
+
237
+ const cacheCreationTokens = tokens.cacheCreation || 0;
238
+ const cacheReadTokens = tokens.cacheRead || 0;
239
+ const regularInputTokens = Math.max(0, (tokens.input || 0) - cacheCreationTokens - cacheReadTokens);
219
240
 
220
241
  return (
221
- (tokens.input || 0) * inputRate / ONE_MILLION +
242
+ regularInputTokens * inputRate / ONE_MILLION +
243
+ cacheCreationTokens * cacheCreationRate / ONE_MILLION +
244
+ cacheReadTokens * cacheReadRate / ONE_MILLION +
222
245
  (tokens.output || 0) * outputRate / ONE_MILLION
223
246
  );
224
247
  }
@@ -369,12 +392,20 @@ function getRequestPathname(urlPath = '') {
369
392
  }
370
393
  }
371
394
 
395
+ function normalizeGatewayPath(pathname = '') {
396
+ const normalized = String(pathname || '').trim();
397
+ if (!normalized) return '/';
398
+ return normalized.replace(/\/+$/, '') || '/';
399
+ }
400
+
372
401
  function isResponsesPath(pathname) {
373
- return pathname === '/v1/responses' || pathname === '/responses';
402
+ const normalized = normalizeGatewayPath(pathname);
403
+ return normalized.endsWith('/v1/responses') || normalized.endsWith('/responses');
374
404
  }
375
405
 
376
406
  function isChatCompletionsPath(pathname) {
377
- return pathname === '/v1/chat/completions' || pathname === '/chat/completions';
407
+ const normalized = normalizeGatewayPath(pathname);
408
+ return normalized.endsWith('/v1/chat/completions') || normalized.endsWith('/chat/completions');
378
409
  }
379
410
 
380
411
  function collectPreferredProbeModels(channel) {
@@ -461,10 +492,192 @@ function extractText(value) {
461
492
  return fragments.join('\n').trim();
462
493
  }
463
494
 
495
+ function parseBase64DataUrl(dataUrl = '') {
496
+ const value = typeof dataUrl === 'string' ? dataUrl.trim() : '';
497
+ if (!value) return null;
498
+ const matched = value.match(/^data:([^;,]+)?;base64,(.+)$/i);
499
+ if (!matched) return null;
500
+ return {
501
+ mediaType: String(matched[1] || '').trim(),
502
+ data: String(matched[2] || '')
503
+ };
504
+ }
505
+
506
+ function inferMimeTypeFromFilename(filename = '', fallback = 'application/octet-stream') {
507
+ const ext = path.extname(String(filename || '').trim()).toLowerCase();
508
+ if (!ext) return fallback;
509
+ return FILE_EXTENSION_MIME_TYPES[ext] || fallback;
510
+ }
511
+
512
+ function normalizeOpenAiImageBlock(value) {
513
+ let imageUrl = '';
514
+ if (typeof value === 'string') {
515
+ imageUrl = value;
516
+ } else if (value && typeof value === 'object') {
517
+ if (typeof value.url === 'string') {
518
+ imageUrl = value.url;
519
+ } else if (typeof value.image_url === 'string') {
520
+ imageUrl = value.image_url;
521
+ } else if (value.image_url && typeof value.image_url === 'object' && typeof value.image_url.url === 'string') {
522
+ imageUrl = value.image_url.url;
523
+ }
524
+ }
525
+
526
+ const normalizedUrl = String(imageUrl || '').trim();
527
+ if (!normalizedUrl) return null;
528
+
529
+ const parsedDataUrl = parseBase64DataUrl(normalizedUrl);
530
+ if (parsedDataUrl && parsedDataUrl.data) {
531
+ const mediaType = parsedDataUrl.mediaType && parsedDataUrl.mediaType.startsWith('image/')
532
+ ? parsedDataUrl.mediaType
533
+ : 'image/png';
534
+ return {
535
+ type: 'image',
536
+ source: {
537
+ type: 'base64',
538
+ media_type: mediaType,
539
+ data: parsedDataUrl.data
540
+ }
541
+ };
542
+ }
543
+
544
+ return {
545
+ type: 'image',
546
+ source: {
547
+ type: 'url',
548
+ url: normalizedUrl
549
+ }
550
+ };
551
+ }
552
+
553
+ function normalizeOpenAiFileBlock(value) {
554
+ if (!value || typeof value !== 'object') return null;
555
+ const filePayload = (value.file && typeof value.file === 'object' && !Array.isArray(value.file))
556
+ ? value.file
557
+ : value;
558
+ const filename = typeof filePayload.filename === 'string' ? filePayload.filename.trim() : '';
559
+ const rawMediaType = typeof filePayload.mime_type === 'string'
560
+ ? filePayload.mime_type.trim()
561
+ : (typeof filePayload.media_type === 'string' ? filePayload.media_type.trim() : '');
562
+ const mediaType = rawMediaType || inferMimeTypeFromFilename(filename);
563
+ const fileData = typeof filePayload.file_data === 'string' ? filePayload.file_data.trim() : '';
564
+ const fileUrl = typeof filePayload.file_url === 'string'
565
+ ? filePayload.file_url.trim()
566
+ : (typeof filePayload.url === 'string' ? filePayload.url.trim() : '');
567
+ const fileId = typeof filePayload.file_id === 'string' ? filePayload.file_id.trim() : '';
568
+
569
+ if (fileData) {
570
+ const parsedDataUrl = parseBase64DataUrl(fileData);
571
+ if (parsedDataUrl && parsedDataUrl.data) {
572
+ return {
573
+ type: 'document',
574
+ source: {
575
+ type: 'base64',
576
+ media_type: parsedDataUrl.mediaType || mediaType,
577
+ data: parsedDataUrl.data
578
+ }
579
+ };
580
+ }
581
+
582
+ return {
583
+ type: 'document',
584
+ source: {
585
+ type: 'base64',
586
+ media_type: mediaType,
587
+ data: fileData
588
+ }
589
+ };
590
+ }
591
+
592
+ if (fileUrl) {
593
+ return {
594
+ type: 'document',
595
+ source: {
596
+ type: 'url',
597
+ url: fileUrl
598
+ }
599
+ };
600
+ }
601
+
602
+ if (fileId) {
603
+ return {
604
+ type: 'text',
605
+ text: `[input_file:${fileId}]`
606
+ };
607
+ }
608
+
609
+ return null;
610
+ }
611
+
612
+ function normalizeOpenAiContentItemToClaudeBlocks(item) {
613
+ if (item === null || item === undefined) return [];
614
+
615
+ if (typeof item === 'string' || typeof item === 'number' || typeof item === 'boolean') {
616
+ const text = String(item);
617
+ return text.trim() ? [{ type: 'text', text }] : [];
618
+ }
619
+
620
+ if (Array.isArray(item)) {
621
+ return item.flatMap(normalizeOpenAiContentItemToClaudeBlocks);
622
+ }
623
+
624
+ if (typeof item !== 'object') return [];
625
+
626
+ const itemType = String(item.type || '').trim().toLowerCase();
627
+ if (itemType === 'tool_use' || itemType === 'tool_result') {
628
+ return [item];
629
+ }
630
+
631
+ if (itemType === 'image' && item.source && typeof item.source === 'object') {
632
+ return [item];
633
+ }
634
+ if (itemType === 'document' && item.source && typeof item.source === 'object') {
635
+ return [item];
636
+ }
637
+
638
+ if (itemType === 'text' || itemType === 'input_text' || itemType === 'output_text') {
639
+ const text = typeof item.text === 'string' ? item.text : '';
640
+ if (!text.trim()) return [];
641
+ const block = { type: 'text', text };
642
+ if (item.cache_control && typeof item.cache_control === 'object') {
643
+ block.cache_control = item.cache_control;
644
+ }
645
+ return [block];
646
+ }
647
+
648
+ if (itemType === 'image_url' || itemType === 'input_image') {
649
+ const imageBlock = normalizeOpenAiImageBlock(item);
650
+ return imageBlock ? [imageBlock] : [];
651
+ }
652
+
653
+ if (itemType === 'file' || itemType === 'input_file') {
654
+ const fileBlock = normalizeOpenAiFileBlock(item);
655
+ return fileBlock ? [fileBlock] : [];
656
+ }
657
+
658
+ if (item.image_url !== undefined || item.url !== undefined) {
659
+ const imageBlock = normalizeOpenAiImageBlock(item);
660
+ if (imageBlock) return [imageBlock];
661
+ }
662
+
663
+ if (item.file !== undefined || item.file_data !== undefined || item.file_url !== undefined || item.file_id !== undefined) {
664
+ const fileBlock = normalizeOpenAiFileBlock(item);
665
+ if (fileBlock) return [fileBlock];
666
+ }
667
+
668
+ const fallbackText = extractText(item);
669
+ return fallbackText ? [{ type: 'text', text: fallbackText }] : [];
670
+ }
671
+
672
+ function normalizeOpenAiContentToClaudeBlocks(content) {
673
+ return normalizeOpenAiContentItemToClaudeBlocks(content);
674
+ }
675
+
464
676
  function normalizeOpenAiRole(role) {
465
677
  const value = String(role || '').trim().toLowerCase();
466
678
  if (value === 'assistant' || value === 'model') return 'assistant';
467
- if (value === 'system') return 'system';
679
+ if (value === 'system' || value === 'developer') return 'system';
680
+ if (value === 'tool') return 'tool';
468
681
  return 'user';
469
682
  }
470
683
 
@@ -521,6 +734,17 @@ function normalizeToolChoiceToClaude(toolChoice) {
521
734
  return undefined;
522
735
  }
523
736
 
737
+ function normalizeReasoningEffortToClaude(reasoningEffort) {
738
+ const effort = String(reasoningEffort || '').trim().toLowerCase();
739
+ if (!effort) return undefined;
740
+ if (effort === 'none') return { type: 'disabled' };
741
+ if (effort === 'auto') return { type: 'enabled' };
742
+ if (effort === 'low') return { type: 'enabled', budget_tokens: 2048 };
743
+ if (effort === 'medium') return { type: 'enabled', budget_tokens: 8192 };
744
+ if (effort === 'high') return { type: 'enabled', budget_tokens: 24576 };
745
+ return undefined;
746
+ }
747
+
524
748
  function generateToolCallId() {
525
749
  return `toolu_${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
526
750
  }
@@ -597,24 +821,41 @@ function buildUserToolResultMessage(item) {
597
821
  }
598
822
 
599
823
  function normalizeOpenCodeMessages(pathname, payload = {}) {
600
- const systemParts = [];
824
+ const systemBlocks = [];
601
825
  const messages = [];
602
826
 
603
827
  if (isResponsesPath(pathname) && typeof payload.instructions === 'string' && payload.instructions.trim()) {
604
- systemParts.push(payload.instructions.trim());
828
+ systemBlocks.push({ type: 'text', text: payload.instructions.trim() });
605
829
  }
606
830
 
607
- const appendMessage = (role, content) => {
831
+ const appendMessage = (role, content, topLevelCacheControl) => {
608
832
  const normalizedRole = normalizeOpenAiRole(role);
609
- const text = extractText(content);
610
- if (!text) return;
833
+ const contentBlocks = normalizeOpenAiContentToClaudeBlocks(content);
611
834
  if (normalizedRole === 'system') {
612
- systemParts.push(text);
835
+ const blocks = contentBlocks
836
+ .filter(block => block && block.type === 'text' && typeof block.text === 'string' && block.text.trim());
837
+ blocks.forEach((block, idx) => {
838
+ const systemBlock = { type: 'text', text: block.text };
839
+ if (block.cache_control && typeof block.cache_control === 'object') {
840
+ systemBlock.cache_control = block.cache_control;
841
+ } else if (topLevelCacheControl && typeof topLevelCacheControl === 'object' && idx === blocks.length - 1) {
842
+ // 消息顶层的 cache_control(OpenCode/Vercel AI SDK 注入方式)打在最后一个 block 上
843
+ systemBlock.cache_control = topLevelCacheControl;
844
+ }
845
+ systemBlocks.push(systemBlock);
846
+ });
613
847
  return;
614
848
  }
849
+
850
+ if (!Array.isArray(contentBlocks) || contentBlocks.length === 0) return;
851
+ // 将消息顶层的 cache_control 传递到最后一个 content block 上
852
+ if (topLevelCacheControl && typeof topLevelCacheControl === 'object' && contentBlocks.length > 0) {
853
+ const lastBlock = contentBlocks[contentBlocks.length - 1];
854
+ if (!lastBlock.cache_control) lastBlock.cache_control = topLevelCacheControl;
855
+ }
615
856
  messages.push({
616
857
  role: normalizedRole === 'assistant' ? 'assistant' : 'user',
617
- content: [{ type: 'text', text }]
858
+ content: contentBlocks
618
859
  });
619
860
  };
620
861
 
@@ -636,7 +877,7 @@ function normalizeOpenCodeMessages(pathname, payload = {}) {
636
877
  return;
637
878
  }
638
879
  if (item.type === 'message' || item.role) {
639
- appendMessage(item.role, item.content);
880
+ appendMessage(item.role, item.content, item.cache_control);
640
881
  }
641
882
  });
642
883
  }
@@ -650,11 +891,7 @@ function normalizeOpenCodeMessages(pathname, payload = {}) {
650
891
  return;
651
892
  }
652
893
  if (message.role === 'assistant' && Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
653
- const assistantContent = [];
654
- const text = extractText(message.content);
655
- if (text) {
656
- assistantContent.push({ type: 'text', text });
657
- }
894
+ const assistantContent = normalizeOpenAiContentToClaudeBlocks(message.content);
658
895
 
659
896
  message.tool_calls.forEach(toolCall => {
660
897
  if (!toolCall || typeof toolCall !== 'object') return;
@@ -679,7 +916,7 @@ function normalizeOpenCodeMessages(pathname, payload = {}) {
679
916
  }
680
917
  return;
681
918
  }
682
- appendMessage(message.role, message.content);
919
+ appendMessage(message.role, message.content, message.cache_control);
683
920
  });
684
921
  }
685
922
 
@@ -691,7 +928,7 @@ function normalizeOpenCodeMessages(pathname, payload = {}) {
691
928
  }
692
929
 
693
930
  return {
694
- system: systemParts.join('\n\n').trim(),
931
+ systemBlocks,
695
932
  messages
696
933
  };
697
934
  }
@@ -710,9 +947,61 @@ function normalizeClaudeMetadata(metadata, fallbackUserId = '') {
710
947
  return normalized;
711
948
  }
712
949
 
950
+ function applyPromptCachingToClaudePayload(converted) {
951
+ const EPHEMERAL = { type: 'ephemeral' };
952
+
953
+ // 统计 messages 中上游(OpenCode)已注入的缓存断点数量
954
+ // OpenCode 策略:对最后2条非system消息打断点,我们不重复注入
955
+ let messageBreakpoints = 0;
956
+ if (Array.isArray(converted.messages)) {
957
+ converted.messages.forEach(msg => {
958
+ if (Array.isArray(msg.content)) {
959
+ msg.content.forEach(block => {
960
+ if (block.cache_control) messageBreakpoints++;
961
+ if (block.type === 'tool_result' && Array.isArray(block.content)) {
962
+ block.content.forEach(inner => {
963
+ if (inner.cache_control) messageBreakpoints++;
964
+ });
965
+ }
966
+ });
967
+ }
968
+ });
969
+ }
970
+
971
+ // 统计 system 中已有的断点
972
+ let systemBreakpoints = 0;
973
+ if (Array.isArray(converted.system)) {
974
+ converted.system.forEach(block => {
975
+ if (block.cache_control) systemBreakpoints++;
976
+ });
977
+ }
978
+
979
+ // 若 messages 已有断点,说明上游(OpenCode)已处理,不再注入 messages 断点
980
+ // 只在 system blocks 没有断点时补充(OpenCode 不操作 system,由我们负责)
981
+ if (systemBreakpoints === 0 && Array.isArray(converted.system) && converted.system.length > 0) {
982
+ const last = converted.system[converted.system.length - 1];
983
+ if (!last.cache_control) last.cache_control = EPHEMERAL;
984
+ }
985
+
986
+ // 若上游完全没有注入任何断点(非 OpenCode 客户端),按原策略补充 messages 断点
987
+ if (messageBreakpoints === 0 && systemBreakpoints === 0) {
988
+ // 对最后2条消息打断点,与 OpenCode 策略对齐
989
+ if (Array.isArray(converted.messages) && converted.messages.length > 0) {
990
+ for (const msg of converted.messages.slice(-2)) {
991
+ if (Array.isArray(msg.content) && msg.content.length > 0) {
992
+ const last = msg.content[msg.content.length - 1];
993
+ if (!last.cache_control) last.cache_control = EPHEMERAL;
994
+ }
995
+ }
996
+ }
997
+ }
998
+ }
999
+
713
1000
  function convertOpenCodePayloadToClaude(pathname, payload = {}, fallbackModel = '', options = {}) {
714
1001
  const normalized = normalizeOpenCodeMessages(pathname, payload);
715
1002
  const maxTokens = Number(payload.max_output_tokens ?? payload.max_tokens);
1003
+ const stopSequences = normalizeStopSequences(payload.stop);
1004
+ const thinking = normalizeReasoningEffortToClaude(payload.reasoning_effort);
716
1005
 
717
1006
  const converted = {
718
1007
  model: payload.model || fallbackModel || 'claude-sonnet-4-20250514',
@@ -721,14 +1010,10 @@ function convertOpenCodePayloadToClaude(pathname, payload = {}, fallbackModel =
721
1010
  messages: normalized.messages
722
1011
  };
723
1012
 
724
- if (normalized.system) {
1013
+ if (normalized.systemBlocks && normalized.systemBlocks.length > 0) {
725
1014
  // 部分 relay 仅接受 Claude system 的 block 数组格式,不接受纯字符串
726
- converted.system = [
727
- {
728
- type: 'text',
729
- text: normalized.system
730
- }
731
- ];
1015
+ // 保留原始 cache_control 字段,确保 prompt cache 正常命中
1016
+ converted.system = normalized.systemBlocks;
732
1017
  }
733
1018
 
734
1019
  const tools = normalizeOpenAiToolsToClaude(payload.tools || []);
@@ -740,6 +1025,12 @@ function convertOpenCodePayloadToClaude(pathname, payload = {}, fallbackModel =
740
1025
  if (toolChoice) {
741
1026
  converted.tool_choice = toolChoice;
742
1027
  }
1028
+ if (stopSequences) {
1029
+ converted.stop_sequences = stopSequences;
1030
+ }
1031
+ if (thinking) {
1032
+ converted.thinking = thinking;
1033
+ }
743
1034
 
744
1035
  if (Number.isFinite(Number(payload.temperature))) {
745
1036
  converted.temperature = Number(payload.temperature);
@@ -754,6 +1045,9 @@ function convertOpenCodePayloadToClaude(pathname, payload = {}, fallbackModel =
754
1045
  // 某些 Claude relay 会校验 metadata.user_id 以识别 Claude Code 请求
755
1046
  converted.metadata = normalizeClaudeMetadata(payload.metadata, options.sessionUserId);
756
1047
 
1048
+ // 注入 prompt cache 断点,对齐 Anthropic AI SDK 的自动缓存行为
1049
+ applyPromptCachingToClaudePayload(converted);
1050
+
757
1051
  return converted;
758
1052
  }
759
1053
 
@@ -761,6 +1055,12 @@ function normalizeOpenAiToolsToGemini(tools = []) {
761
1055
  if (!Array.isArray(tools)) return [];
762
1056
 
763
1057
  const functionDeclarations = [];
1058
+ const builtInTools = [];
1059
+ const appendBuiltInTool = (toolNode) => {
1060
+ if (!toolNode || typeof toolNode !== 'object') return;
1061
+ builtInTools.push(toolNode);
1062
+ };
1063
+
764
1064
  for (const tool of tools) {
765
1065
  if (!tool || typeof tool !== 'object') continue;
766
1066
 
@@ -781,11 +1081,56 @@ function normalizeOpenAiToolsToGemini(tools = []) {
781
1081
  description: tool.description || '',
782
1082
  parameters: tool.parameters || { type: 'object', properties: {} }
783
1083
  });
1084
+ continue;
1085
+ }
1086
+
1087
+ const normalizedType = String(tool.type || '').trim().toLowerCase();
1088
+
1089
+ if (tool.google_search && typeof tool.google_search === 'object') {
1090
+ appendBuiltInTool({ googleSearch: tool.google_search });
1091
+ continue;
1092
+ }
1093
+ if (tool.code_execution && typeof tool.code_execution === 'object') {
1094
+ appendBuiltInTool({ codeExecution: tool.code_execution });
1095
+ continue;
1096
+ }
1097
+ if (tool.url_context && typeof tool.url_context === 'object') {
1098
+ appendBuiltInTool({ urlContext: tool.url_context });
1099
+ continue;
1100
+ }
1101
+
1102
+ if (normalizedType === 'google_search' || normalizedType === 'web_search' || normalizedType === 'web_search_preview') {
1103
+ const searchConfig = (tool.web_search && typeof tool.web_search === 'object')
1104
+ ? tool.web_search
1105
+ : ((tool.googleSearch && typeof tool.googleSearch === 'object') ? tool.googleSearch : {});
1106
+ appendBuiltInTool({ googleSearch: searchConfig });
1107
+ continue;
1108
+ }
1109
+
1110
+ if (normalizedType === 'code_execution' || normalizedType === 'code_interpreter') {
1111
+ const executionConfig = (tool.codeExecution && typeof tool.codeExecution === 'object')
1112
+ ? tool.codeExecution
1113
+ : {};
1114
+ appendBuiltInTool({ codeExecution: executionConfig });
1115
+ continue;
1116
+ }
1117
+
1118
+ if (normalizedType === 'url_context') {
1119
+ const urlContextConfig = (tool.urlContext && typeof tool.urlContext === 'object')
1120
+ ? tool.urlContext
1121
+ : {};
1122
+ appendBuiltInTool({ urlContext: urlContextConfig });
784
1123
  }
785
1124
  }
786
1125
 
787
- if (functionDeclarations.length === 0) return [];
788
- return [{ functionDeclarations }];
1126
+ const normalizedTools = [];
1127
+ if (functionDeclarations.length > 0) {
1128
+ normalizedTools.push({ functionDeclarations });
1129
+ }
1130
+ if (builtInTools.length > 0) {
1131
+ normalizedTools.push(...builtInTools);
1132
+ }
1133
+ return normalizedTools;
789
1134
  }
790
1135
 
791
1136
  function normalizeToolChoiceToGemini(toolChoice) {
@@ -828,6 +1173,44 @@ function normalizeToolChoiceToGemini(toolChoice) {
828
1173
  return undefined;
829
1174
  }
830
1175
 
1176
+ function normalizeReasoningEffortToGemini(reasoningEffort) {
1177
+ const effort = String(reasoningEffort || '').trim().toLowerCase();
1178
+ if (!effort) return undefined;
1179
+ if (effort === 'none') {
1180
+ return {
1181
+ includeThoughts: false,
1182
+ thinkingBudget: 0
1183
+ };
1184
+ }
1185
+ if (effort === 'auto') {
1186
+ return {
1187
+ includeThoughts: true,
1188
+ thinkingBudget: -1
1189
+ };
1190
+ }
1191
+ if (effort === 'low' || effort === 'medium' || effort === 'high') {
1192
+ return {
1193
+ includeThoughts: true,
1194
+ thinkingLevel: effort
1195
+ };
1196
+ }
1197
+ return undefined;
1198
+ }
1199
+
1200
+ function normalizeGeminiResponseModalities(modalities) {
1201
+ if (!Array.isArray(modalities)) return undefined;
1202
+ const mapped = modalities
1203
+ .map(item => String(item || '').trim().toLowerCase())
1204
+ .filter(Boolean)
1205
+ .map(item => {
1206
+ if (item === 'text') return 'TEXT';
1207
+ if (item === 'image') return 'IMAGE';
1208
+ return '';
1209
+ })
1210
+ .filter(Boolean);
1211
+ return mapped.length > 0 ? mapped : undefined;
1212
+ }
1213
+
831
1214
  function normalizeStopSequences(stopValue) {
832
1215
  if (!stopValue) return undefined;
833
1216
  if (typeof stopValue === 'string' && stopValue.trim()) {
@@ -863,6 +1246,42 @@ function normalizeGeminiFunctionResponsePayload(value) {
863
1246
  return { content: normalizeToolResultContent(value) };
864
1247
  }
865
1248
 
1249
+ function normalizeGeminiMediaType(value, fallback = 'application/octet-stream') {
1250
+ const mediaType = typeof value === 'string' ? value.trim() : '';
1251
+ return mediaType || fallback;
1252
+ }
1253
+
1254
+ function buildGeminiPartFromClaudeMediaBlock(block) {
1255
+ if (!block || typeof block !== 'object') return null;
1256
+ const source = (block.source && typeof block.source === 'object') ? block.source : null;
1257
+ if (!source) return null;
1258
+
1259
+ const blockType = String(block.type || '').trim().toLowerCase();
1260
+ const defaultMimeType = blockType === 'image' ? 'image/png' : 'application/octet-stream';
1261
+ const sourceType = String(source.type || '').trim().toLowerCase();
1262
+ const mediaType = normalizeGeminiMediaType(source.media_type || source.mime_type, defaultMimeType);
1263
+
1264
+ if (sourceType === 'base64' && typeof source.data === 'string' && source.data.trim()) {
1265
+ return {
1266
+ inlineData: {
1267
+ mimeType: mediaType,
1268
+ data: source.data
1269
+ }
1270
+ };
1271
+ }
1272
+
1273
+ if (sourceType === 'url' && typeof source.url === 'string' && source.url.trim()) {
1274
+ return {
1275
+ fileData: {
1276
+ mimeType: mediaType,
1277
+ fileUri: source.url.trim()
1278
+ }
1279
+ };
1280
+ }
1281
+
1282
+ return null;
1283
+ }
1284
+
866
1285
  function buildGeminiContents(messages = []) {
867
1286
  const contents = [];
868
1287
  const toolNameById = new Map();
@@ -917,6 +1336,14 @@ function buildGeminiContents(messages = []) {
917
1336
  continue;
918
1337
  }
919
1338
 
1339
+ if (block.type === 'image' || block.type === 'document') {
1340
+ const mediaPart = buildGeminiPartFromClaudeMediaBlock(block);
1341
+ if (mediaPart) {
1342
+ parts.push(mediaPart);
1343
+ continue;
1344
+ }
1345
+ }
1346
+
920
1347
  const text = extractText(block);
921
1348
  if (text) {
922
1349
  parts.push({ text });
@@ -1014,14 +1441,20 @@ function convertOpenCodePayloadToGemini(pathname, payload = {}, fallbackModel =
1014
1441
  const stopSequences = normalizeStopSequences(payload.stop);
1015
1442
  const tools = normalizeOpenAiToolsToGemini(payload.tools || []);
1016
1443
  const toolConfig = normalizeToolChoiceToGemini(payload.tool_choice);
1444
+ const thinkingConfig = normalizeReasoningEffortToGemini(payload.reasoning_effort);
1445
+ const candidateCount = Number(payload.n);
1446
+ const responseModalities = normalizeGeminiResponseModalities(payload.modalities);
1447
+ const imageConfig = (payload.image_config && typeof payload.image_config === 'object' && !Array.isArray(payload.image_config))
1448
+ ? payload.image_config
1449
+ : null;
1017
1450
 
1018
1451
  const requestBody = {
1019
1452
  contents: buildGeminiContents(normalized.messages)
1020
1453
  };
1021
1454
 
1022
- if (normalized.system) {
1455
+ if (normalized.systemBlocks && normalized.systemBlocks.length > 0) {
1023
1456
  requestBody.systemInstruction = {
1024
- parts: [{ text: normalized.system }]
1457
+ parts: normalized.systemBlocks.map(block => ({ text: block.text || '' })).filter(p => p.text)
1025
1458
  };
1026
1459
  }
1027
1460
 
@@ -1041,6 +1474,27 @@ function convertOpenCodePayloadToGemini(pathname, payload = {}, fallbackModel =
1041
1474
  if (stopSequences) {
1042
1475
  generationConfig.stopSequences = stopSequences;
1043
1476
  }
1477
+ if (thinkingConfig) {
1478
+ generationConfig.thinkingConfig = thinkingConfig;
1479
+ }
1480
+ if (Number.isFinite(candidateCount) && candidateCount > 1) {
1481
+ generationConfig.candidateCount = Math.round(candidateCount);
1482
+ }
1483
+ if (responseModalities) {
1484
+ generationConfig.responseModalities = responseModalities;
1485
+ }
1486
+ if (imageConfig) {
1487
+ const mappedImageConfig = {};
1488
+ if (typeof imageConfig.aspect_ratio === 'string' && imageConfig.aspect_ratio.trim()) {
1489
+ mappedImageConfig.aspectRatio = imageConfig.aspect_ratio.trim();
1490
+ }
1491
+ if (typeof imageConfig.image_size === 'string' && imageConfig.image_size.trim()) {
1492
+ mappedImageConfig.imageSize = imageConfig.image_size.trim();
1493
+ }
1494
+ if (Object.keys(mappedImageConfig).length > 0) {
1495
+ generationConfig.imageConfig = mappedImageConfig;
1496
+ }
1497
+ }
1044
1498
  if (Object.keys(generationConfig).length > 0) {
1045
1499
  requestBody.generationConfig = generationConfig;
1046
1500
  }
@@ -1314,12 +1768,22 @@ function extractClaudeResponseContent(claudeResponse = {}) {
1314
1768
  const textFragments = [];
1315
1769
  const functionCalls = [];
1316
1770
  const reasoningItems = [];
1317
-
1318
- if (!Array.isArray(claudeResponse.content)) {
1771
+ const nestedResponse = claudeResponse?.response && typeof claudeResponse.response === 'object'
1772
+ ? claudeResponse.response
1773
+ : null;
1774
+ const contentBlocks = Array.isArray(claudeResponse.content)
1775
+ ? claudeResponse.content
1776
+ : (Array.isArray(nestedResponse?.content) ? nestedResponse.content : null);
1777
+
1778
+ if (!Array.isArray(contentBlocks)) {
1779
+ const messageContent = claudeResponse?.choices?.[0]?.message?.content;
1780
+ if (typeof messageContent === 'string' && messageContent.trim()) {
1781
+ return { text: messageContent.trim(), functionCalls: [], reasoningItems: [] };
1782
+ }
1319
1783
  return { text: '', functionCalls: [], reasoningItems: [] };
1320
1784
  }
1321
1785
 
1322
- claudeResponse.content.forEach(block => {
1786
+ contentBlocks.forEach(block => {
1323
1787
  if (!block || typeof block !== 'object') return;
1324
1788
 
1325
1789
  if (typeof block.text === 'string' && block.text.trim()) {
@@ -1357,6 +1821,109 @@ function extractClaudeResponseContent(claudeResponse = {}) {
1357
1821
  };
1358
1822
  }
1359
1823
 
1824
+ function toNumberOrZero(value) {
1825
+ const num = Number(value);
1826
+ return Number.isFinite(num) ? num : 0;
1827
+ }
1828
+
1829
+ function pickFirstFiniteNumber(values = []) {
1830
+ for (const value of values) {
1831
+ const num = Number(value);
1832
+ if (Number.isFinite(num)) return num;
1833
+ }
1834
+ return null;
1835
+ }
1836
+
1837
+ function extractClaudeLikeUsage(claudeResponse = {}) {
1838
+ const nestedResponse = claudeResponse?.response && typeof claudeResponse.response === 'object'
1839
+ ? claudeResponse.response
1840
+ : {};
1841
+ const messageObject = claudeResponse?.message && typeof claudeResponse.message === 'object'
1842
+ ? claudeResponse.message
1843
+ : {};
1844
+
1845
+ const usageCandidates = [
1846
+ claudeResponse?.usage,
1847
+ nestedResponse?.usage,
1848
+ messageObject?.usage
1849
+ ].filter(item => item && typeof item === 'object');
1850
+
1851
+ const metadataCandidates = [
1852
+ claudeResponse?.providerMetadata,
1853
+ nestedResponse?.providerMetadata,
1854
+ claudeResponse?.metadata,
1855
+ nestedResponse?.metadata
1856
+ ].filter(item => item && typeof item === 'object');
1857
+
1858
+ const inputTokens = pickFirstFiniteNumber(
1859
+ usageCandidates.flatMap(usage => [
1860
+ usage.input_tokens,
1861
+ usage.prompt_tokens,
1862
+ usage.inputTokens,
1863
+ usage.promptTokens
1864
+ ])
1865
+ );
1866
+
1867
+ const outputTokens = pickFirstFiniteNumber(
1868
+ usageCandidates.flatMap(usage => [
1869
+ usage.output_tokens,
1870
+ usage.completion_tokens,
1871
+ usage.outputTokens,
1872
+ usage.completionTokens
1873
+ ])
1874
+ );
1875
+
1876
+ const totalTokens = pickFirstFiniteNumber(
1877
+ usageCandidates.flatMap(usage => [
1878
+ usage.total_tokens,
1879
+ usage.totalTokens
1880
+ ])
1881
+ );
1882
+
1883
+ const cacheReadTokens = pickFirstFiniteNumber(
1884
+ usageCandidates.flatMap(usage => [
1885
+ usage.cache_read_input_tokens,
1886
+ usage.cacheReadInputTokens,
1887
+ usage.input_tokens_details?.cached_tokens,
1888
+ usage.prompt_tokens_details?.cached_tokens
1889
+ ])
1890
+ );
1891
+
1892
+ const cacheCreationFromUsage = pickFirstFiniteNumber(
1893
+ usageCandidates.flatMap(usage => [
1894
+ usage.cache_creation_input_tokens,
1895
+ usage.cacheCreationInputTokens
1896
+ ])
1897
+ );
1898
+ const cacheCreationFromMetadata = pickFirstFiniteNumber(
1899
+ metadataCandidates.flatMap(metadata => [
1900
+ metadata?.anthropic?.cacheCreationInputTokens,
1901
+ metadata?.venice?.usage?.cacheCreationInputTokens,
1902
+ metadata?.bedrock?.usage?.cacheWriteInputTokens
1903
+ ])
1904
+ );
1905
+
1906
+ const reasoningTokens = pickFirstFiniteNumber(
1907
+ usageCandidates.flatMap(usage => [
1908
+ usage.output_tokens_details?.reasoning_tokens,
1909
+ usage.completion_tokens_details?.reasoning_tokens,
1910
+ usage.reasoning_tokens,
1911
+ usage.reasoningTokens
1912
+ ])
1913
+ );
1914
+
1915
+ return {
1916
+ inputTokens: toNumberOrZero(inputTokens),
1917
+ outputTokens: toNumberOrZero(outputTokens),
1918
+ totalTokens: toNumberOrZero(totalTokens),
1919
+ cacheReadTokens: toNumberOrZero(cacheReadTokens),
1920
+ cacheCreationTokens: toNumberOrZero(
1921
+ cacheCreationFromMetadata !== null ? cacheCreationFromMetadata : cacheCreationFromUsage
1922
+ ),
1923
+ reasoningTokens: toNumberOrZero(reasoningTokens)
1924
+ };
1925
+ }
1926
+
1360
1927
  function extractClaudeResponseText(claudeResponse = {}) {
1361
1928
  return extractClaudeResponseContent(claudeResponse).text;
1362
1929
  }
@@ -1460,13 +2027,17 @@ function mapGeminiFinishReasonToChatFinishReason(finishReason, hasToolCalls = fa
1460
2027
  }
1461
2028
 
1462
2029
  function buildOpenAiResponsesObject(claudeResponse = {}, fallbackModel = '') {
1463
- const inputTokens = Number(claudeResponse?.usage?.input_tokens || 0);
1464
- const outputTokens = Number(claudeResponse?.usage?.output_tokens || 0);
1465
- const totalTokens = Number(claudeResponse?.usage?.total_tokens || (inputTokens + outputTokens));
2030
+ const usage = extractClaudeLikeUsage(claudeResponse);
2031
+ const inputTokens = usage.inputTokens;
2032
+ const outputTokens = usage.outputTokens;
2033
+ const totalTokens = usage.totalTokens > 0 ? usage.totalTokens : (inputTokens + outputTokens);
2034
+ const cacheCreationTokens = usage.cacheCreationTokens;
2035
+ const cacheReadTokens = usage.cacheReadTokens;
1466
2036
  const parsedContent = extractClaudeResponseContent(claudeResponse);
1467
2037
  const text = parsedContent.text;
1468
- const reasoningTokens = parsedContent.reasoningItems.reduce((acc, item) => acc + Math.floor((item.text || '').length / 4), 0);
1469
- const model = claudeResponse.model || fallbackModel || '';
2038
+ const estimatedReasoningTokens = parsedContent.reasoningItems.reduce((acc, item) => acc + Math.floor((item.text || '').length / 4), 0);
2039
+ const reasoningTokens = usage.reasoningTokens > 0 ? usage.reasoningTokens : estimatedReasoningTokens;
2040
+ const model = claudeResponse.model || claudeResponse?.response?.model || fallbackModel || '';
1470
2041
  const responseId = `resp_${String(claudeResponse.id || Date.now()).replace(/[^a-zA-Z0-9_]/g, '')}`;
1471
2042
  const messageId = claudeResponse.id || `msg_${Date.now()}`;
1472
2043
  const createdAt = Math.floor(Date.now() / 1000);
@@ -1512,7 +2083,7 @@ function buildOpenAiResponsesObject(claudeResponse = {}, fallbackModel = '') {
1512
2083
  });
1513
2084
  });
1514
2085
 
1515
- return {
2086
+ const responseObject = {
1516
2087
  id: responseId,
1517
2088
  object: 'response',
1518
2089
  created_at: createdAt,
@@ -1523,9 +2094,21 @@ function buildOpenAiResponsesObject(claudeResponse = {}, fallbackModel = '') {
1523
2094
  input_tokens: inputTokens,
1524
2095
  output_tokens: outputTokens,
1525
2096
  total_tokens: totalTokens,
2097
+ ...(cacheReadTokens > 0 ? { input_tokens_details: { cached_tokens: cacheReadTokens } } : {}),
1526
2098
  ...(reasoningTokens > 0 ? { output_tokens_details: { reasoning_tokens: reasoningTokens } } : {})
1527
2099
  }
1528
2100
  };
2101
+
2102
+ if (cacheCreationTokens > 0 || cacheReadTokens > 0) {
2103
+ responseObject.providerMetadata = {
2104
+ anthropic: {
2105
+ ...(cacheCreationTokens > 0 ? { cacheCreationInputTokens: cacheCreationTokens } : {}),
2106
+ ...(cacheReadTokens > 0 ? { cacheReadInputTokens: cacheReadTokens } : {})
2107
+ }
2108
+ };
2109
+ }
2110
+
2111
+ return responseObject;
1529
2112
  }
1530
2113
 
1531
2114
  function buildOpenAiResponsesObjectFromGemini(geminiResponse = {}, fallbackModel = '') {
@@ -1599,12 +2182,16 @@ function buildOpenAiResponsesObjectFromGemini(geminiResponse = {}, fallbackModel
1599
2182
  }
1600
2183
 
1601
2184
  function buildOpenAiChatCompletionsObject(claudeResponse = {}, fallbackModel = '') {
1602
- const inputTokens = Number(claudeResponse?.usage?.input_tokens || 0);
1603
- const outputTokens = Number(claudeResponse?.usage?.output_tokens || 0);
1604
- const totalTokens = Number(claudeResponse?.usage?.total_tokens || (inputTokens + outputTokens));
2185
+ const usage = extractClaudeLikeUsage(claudeResponse);
2186
+ const inputTokens = usage.inputTokens;
2187
+ const outputTokens = usage.outputTokens;
2188
+ const totalTokens = usage.totalTokens > 0 ? usage.totalTokens : (inputTokens + outputTokens);
2189
+ const cachedTokens = usage.cacheReadTokens;
1605
2190
  const parsedContent = extractClaudeResponseContent(claudeResponse);
2191
+ const estimatedReasoningTokens = parsedContent.reasoningItems.reduce((acc, item) => acc + Math.floor((item.text || '').length / 4), 0);
2192
+ const reasoningTokens = usage.reasoningTokens > 0 ? usage.reasoningTokens : estimatedReasoningTokens;
1606
2193
  const text = parsedContent.text;
1607
- const model = claudeResponse.model || fallbackModel || '';
2194
+ const model = claudeResponse.model || claudeResponse?.response?.model || fallbackModel || '';
1608
2195
  const chatId = `chatcmpl_${String(claudeResponse.id || Date.now()).replace(/[^a-zA-Z0-9_]/g, '')}`;
1609
2196
  const created = Math.floor(Date.now() / 1000);
1610
2197
  const hasToolCalls = parsedContent.functionCalls.length > 0;
@@ -1639,7 +2226,9 @@ function buildOpenAiChatCompletionsObject(claudeResponse = {}, fallbackModel = '
1639
2226
  usage: {
1640
2227
  prompt_tokens: inputTokens,
1641
2228
  completion_tokens: outputTokens,
1642
- total_tokens: totalTokens
2229
+ total_tokens: totalTokens,
2230
+ ...(cachedTokens > 0 ? { prompt_tokens_details: { cached_tokens: cachedTokens } } : {}),
2231
+ ...(reasoningTokens > 0 ? { completion_tokens_details: { reasoning_tokens: reasoningTokens } } : {})
1643
2232
  }
1644
2233
  };
1645
2234
  }
@@ -1648,6 +2237,9 @@ function buildOpenAiChatCompletionsObjectFromGemini(geminiResponse = {}, fallbac
1648
2237
  const usage = extractGeminiUsage(geminiResponse);
1649
2238
  const parsedContent = extractGeminiResponseContent(geminiResponse);
1650
2239
  const text = parsedContent.text;
2240
+ const reasoningTokens = usage.reasoningTokens > 0
2241
+ ? usage.reasoningTokens
2242
+ : parsedContent.reasoningItems.reduce((acc, item) => acc + Math.floor((item.text || '').length / 4), 0);
1651
2243
  const model = geminiResponse.modelVersion || fallbackModel || '';
1652
2244
  const chatId = `chatcmpl_${Date.now()}`;
1653
2245
  const created = Math.floor(Date.now() / 1000);
@@ -1686,7 +2278,9 @@ function buildOpenAiChatCompletionsObjectFromGemini(geminiResponse = {}, fallbac
1686
2278
  usage: {
1687
2279
  prompt_tokens: usage.inputTokens,
1688
2280
  completion_tokens: usage.outputTokens,
1689
- total_tokens: usage.totalTokens
2281
+ total_tokens: usage.totalTokens,
2282
+ ...(usage.cachedTokens > 0 ? { prompt_tokens_details: { cached_tokens: usage.cachedTokens } } : {}),
2283
+ ...(reasoningTokens > 0 ? { completion_tokens_details: { reasoning_tokens: reasoningTokens } } : {})
1690
2284
  }
1691
2285
  };
1692
2286
  }
@@ -1702,11 +2296,31 @@ function sendOpenAiStyleError(res, statusCode, message, type = 'invalid_request_
1702
2296
  }
1703
2297
 
1704
2298
  function publishOpenCodeUsageLog({ requestId, channel, model, usage, startTime }) {
1705
- const inputTokens = Number(usage?.input_tokens || usage?.prompt_tokens || 0);
1706
- const outputTokens = Number(usage?.output_tokens || usage?.completion_tokens || 0);
1707
- const totalTokens = Number(usage?.total_tokens || (inputTokens + outputTokens));
1708
- const cachedTokens = Number(usage?.input_tokens_details?.cached_tokens || 0);
1709
- const reasoningTokens = Number(usage?.output_tokens_details?.reasoning_tokens || 0);
2299
+ // 兼容多种 usage 格式:
2300
+ // - 标准 OpenAI/Anthropic 格式: {input_tokens, output_tokens} {prompt_tokens, completion_tokens}
2301
+ // - 网关内部格式 (relayChatCompletionsStream 等返回): {input, output, cacheCreation, cacheRead}
2302
+ const inputTokens = Number(usage?.input_tokens || usage?.prompt_tokens || usage?.input || 0);
2303
+ const outputTokens = Number(usage?.output_tokens || usage?.completion_tokens || usage?.output || 0);
2304
+ const totalTokens = Number(usage?.total_tokens || usage?.total || (inputTokens + outputTokens));
2305
+ const cacheReadTokens = Number(
2306
+ usage?.input_tokens_details?.cached_tokens
2307
+ || usage?.prompt_tokens_details?.cached_tokens
2308
+ || usage?.providerMetadata?.anthropic?.cacheReadInputTokens
2309
+ || usage?.cacheRead
2310
+ || 0
2311
+ );
2312
+ const cacheCreationTokens = Number(
2313
+ usage?.providerMetadata?.anthropic?.cacheCreationInputTokens
2314
+ || usage?.cacheCreation
2315
+ || 0
2316
+ );
2317
+ const cachedTokens = cacheReadTokens + cacheCreationTokens;
2318
+ const reasoningTokens = Number(
2319
+ usage?.output_tokens_details?.reasoning_tokens
2320
+ || usage?.completion_tokens_details?.reasoning_tokens
2321
+ || usage?.reasoning
2322
+ || 0
2323
+ );
1710
2324
  const now = new Date();
1711
2325
  const time = now.toLocaleTimeString('zh-CN', {
1712
2326
  hour12: false,
@@ -1718,7 +2332,9 @@ function publishOpenCodeUsageLog({ requestId, channel, model, usage, startTime }
1718
2332
  const tokens = {
1719
2333
  input: inputTokens,
1720
2334
  output: outputTokens,
1721
- total: totalTokens
2335
+ total: totalTokens,
2336
+ cacheRead: cacheReadTokens,
2337
+ cacheCreation: cacheCreationTokens
1722
2338
  };
1723
2339
  const cost = calculateCost(model || '', tokens);
1724
2340
 
@@ -1824,10 +2440,57 @@ function sendResponsesSse(res, responseObject) {
1824
2440
  res.end();
1825
2441
  }
1826
2442
 
2443
+ function normalizeChatCompletionsDeltaToolCalls(toolCalls = []) {
2444
+ if (!Array.isArray(toolCalls)) return [];
2445
+
2446
+ const normalizeIndex = (value, fallbackIndex) => {
2447
+ if (typeof value === 'number' && Number.isInteger(value) && value >= 0) return value;
2448
+ if (typeof value === 'string') {
2449
+ const trimmed = value.trim();
2450
+ if (/^\d+$/.test(trimmed)) return Number(trimmed);
2451
+ }
2452
+ return fallbackIndex;
2453
+ };
2454
+
2455
+ const normalizedToolCalls = [];
2456
+ let fallbackIndex = 0;
2457
+
2458
+ toolCalls.forEach(toolCall => {
2459
+ if (!toolCall || typeof toolCall !== 'object') return;
2460
+
2461
+ const rawFunction = (toolCall.function && typeof toolCall.function === 'object')
2462
+ ? toolCall.function
2463
+ : {};
2464
+ const fallbackName = typeof toolCall.name === 'string' ? toolCall.name : '';
2465
+ const name = typeof rawFunction.name === 'string' ? rawFunction.name : fallbackName;
2466
+ const rawArguments = Object.prototype.hasOwnProperty.call(rawFunction, 'arguments')
2467
+ ? rawFunction.arguments
2468
+ : toolCall.arguments;
2469
+ const argumentsString = normalizeFunctionArgumentsString(
2470
+ typeof rawArguments === 'string'
2471
+ ? rawArguments
2472
+ : JSON.stringify(rawArguments && typeof rawArguments === 'object' ? rawArguments : {})
2473
+ );
2474
+
2475
+ normalizedToolCalls.push({
2476
+ index: normalizeIndex(toolCall.index, fallbackIndex),
2477
+ id: typeof toolCall.id === 'string' && toolCall.id.trim() ? toolCall.id.trim() : generateToolCallId(),
2478
+ type: 'function',
2479
+ function: {
2480
+ name,
2481
+ arguments: argumentsString
2482
+ }
2483
+ });
2484
+ fallbackIndex += 1;
2485
+ });
2486
+
2487
+ return normalizedToolCalls;
2488
+ }
2489
+
1827
2490
  function sendChatCompletionsSse(res, responseObject) {
1828
2491
  const message = responseObject?.choices?.[0]?.message || {};
1829
2492
  const text = message?.content || '';
1830
- const toolCalls = Array.isArray(message?.tool_calls) ? message.tool_calls : [];
2493
+ const toolCalls = normalizeChatCompletionsDeltaToolCalls(message?.tool_calls);
1831
2494
  const finishReason = responseObject?.choices?.[0]?.finish_reason || 'stop';
1832
2495
 
1833
2496
  setSseHeaders(res);
@@ -1865,6 +2528,21 @@ function sendChatCompletionsSse(res, responseObject) {
1865
2528
  ]
1866
2529
  };
1867
2530
  writeSseData(res, doneChunk);
2531
+ // Match OpenAI stream_options.include_usage behavior: emit a final usage chunk.
2532
+ writeSseData(res, {
2533
+ id: responseObject.id,
2534
+ object: 'chat.completion.chunk',
2535
+ created: responseObject.created,
2536
+ model: responseObject.model,
2537
+ choices: [],
2538
+ usage: responseObject?.usage && typeof responseObject.usage === 'object'
2539
+ ? responseObject.usage
2540
+ : {
2541
+ prompt_tokens: 0,
2542
+ completion_tokens: 0,
2543
+ total_tokens: 0
2544
+ }
2545
+ });
1868
2546
  writeSseDone(res);
1869
2547
  res.end();
1870
2548
  }
@@ -1882,6 +2560,9 @@ function createClaudeResponsesStreamState(fallbackModel = '') {
1882
2560
  model: fallbackModel || '',
1883
2561
  inputTokens: 0,
1884
2562
  outputTokens: 0,
2563
+ cachedTokens: 0,
2564
+ cacheCreationTokens: 0,
2565
+ cacheReadTokens: 0,
1885
2566
  usageSeen: false,
1886
2567
  blockTypeByIndex: new Map(),
1887
2568
  messageIdByIndex: new Map(),
@@ -2001,15 +2682,30 @@ function buildCompletedResponsesObjectFromStreamState(state) {
2001
2682
  output
2002
2683
  };
2003
2684
 
2004
- if (state.usageSeen || totalTokens > 0 || reasoningTokens > 0) {
2005
- response.usage = {
2006
- input_tokens: Number(state.inputTokens || 0),
2007
- output_tokens: Number(state.outputTokens || 0),
2008
- total_tokens: totalTokens
2685
+ // 始终输出 usage 字段,确保 OpenCode Context 面板能正确读取 token 数据
2686
+ response.usage = {
2687
+ input_tokens: Number(state.inputTokens || 0),
2688
+ output_tokens: Number(state.outputTokens || 0),
2689
+ total_tokens: totalTokens
2690
+ };
2691
+ if (reasoningTokens > 0) {
2692
+ response.usage.output_tokens_details = { reasoning_tokens: reasoningTokens };
2693
+ }
2694
+ if ((state.cacheReadTokens || 0) > 0) {
2695
+ response.usage.input_tokens_details = { cached_tokens: Number(state.cacheReadTokens || 0) };
2696
+ }
2697
+ // 注入 providerMetadata.anthropic,供 OpenCode Session.getUsage() 读取 cache write/read tokens
2698
+ if ((state.cacheCreationTokens || 0) > 0 || (state.cacheReadTokens || 0) > 0) {
2699
+ response.providerMetadata = {
2700
+ anthropic: {
2701
+ ...(Number(state.cacheCreationTokens || 0) > 0
2702
+ ? { cacheCreationInputTokens: Number(state.cacheCreationTokens || 0) }
2703
+ : {}),
2704
+ ...(Number(state.cacheReadTokens || 0) > 0
2705
+ ? { cacheReadInputTokens: Number(state.cacheReadTokens || 0) }
2706
+ : {})
2707
+ }
2009
2708
  };
2010
- if (reasoningTokens > 0) {
2011
- response.usage.output_tokens_details = { reasoning_tokens: reasoningTokens };
2012
- }
2013
2709
  }
2014
2710
 
2015
2711
  return response;
@@ -2036,6 +2732,14 @@ function processClaudeResponsesSseEvent(parsed, state, res) {
2036
2732
  state.outputTokens = Number(message.usage.output_tokens);
2037
2733
  state.usageSeen = true;
2038
2734
  }
2735
+ const cacheCreation = Number(message.usage.cache_creation_input_tokens || 0);
2736
+ const cacheRead = Number(message.usage.cache_read_input_tokens || 0);
2737
+ if (Number.isFinite(cacheCreation + cacheRead) && (cacheCreation + cacheRead) > 0) {
2738
+ state.cacheCreationTokens = cacheCreation;
2739
+ state.cacheReadTokens = cacheRead;
2740
+ state.cachedTokens = cacheCreation + cacheRead;
2741
+ state.usageSeen = true;
2742
+ }
2039
2743
  }
2040
2744
 
2041
2745
  writeSseData(res, {
@@ -2345,7 +3049,7 @@ function processClaudeResponsesSseEvent(parsed, state, res) {
2345
3049
 
2346
3050
  if (type === 'message_delta') {
2347
3051
  const usage = parsed.usage && typeof parsed.usage === 'object' ? parsed.usage : {};
2348
- if (Number.isFinite(Number(usage.input_tokens))) {
3052
+ if (Number.isFinite(Number(usage.input_tokens)) && Number(usage.input_tokens) > 0) {
2349
3053
  state.inputTokens = Number(usage.input_tokens);
2350
3054
  state.usageSeen = true;
2351
3055
  }
@@ -2353,6 +3057,14 @@ function processClaudeResponsesSseEvent(parsed, state, res) {
2353
3057
  state.outputTokens = Number(usage.output_tokens);
2354
3058
  state.usageSeen = true;
2355
3059
  }
3060
+ const cacheCreation = Number(usage.cache_creation_input_tokens || 0);
3061
+ const cacheRead = Number(usage.cache_read_input_tokens || 0);
3062
+ if (Number.isFinite(cacheCreation + cacheRead) && (cacheCreation + cacheRead) > 0) {
3063
+ state.cacheCreationTokens = cacheCreation;
3064
+ state.cacheReadTokens = cacheRead;
3065
+ state.cachedTokens = cacheCreation + cacheRead;
3066
+ state.usageSeen = true;
3067
+ }
2356
3068
  return;
2357
3069
  }
2358
3070
 
@@ -2687,6 +3399,253 @@ async function collectCodexResponsesNonStream(upstreamResponse, originalPayload
2687
3399
  });
2688
3400
  }
2689
3401
 
3402
+ async function relayChatCompletionsStream(upstreamResponse, res, fallbackModel = '') {
3403
+ setSseHeaders(res);
3404
+ const stream = createDecodedStream(upstreamResponse);
3405
+
3406
+ const chatId = `chatcmpl_${Date.now()}`;
3407
+ const created = Math.floor(Date.now() / 1000);
3408
+
3409
+ // state tracked across SSE events
3410
+ const state = {
3411
+ model: fallbackModel || '',
3412
+ inputTokens: 0,
3413
+ outputTokens: 0,
3414
+ cacheCreationTokens: 0,
3415
+ cacheReadTokens: 0,
3416
+ stopReason: 'stop',
3417
+ // per-block tracking
3418
+ blockTypeByIndex: new Map(),
3419
+ functionCallIdByIndex: new Map(),
3420
+ functionNameByIndex: new Map(),
3421
+ functionArgsByIndex: new Map(),
3422
+ // tool_call index emitted to client (sequential, starting at 0)
3423
+ toolCallClientIndexByBlockIndex: new Map(),
3424
+ nextToolCallClientIndex: 0
3425
+ };
3426
+
3427
+ return new Promise((resolve, reject) => {
3428
+ let buffer = '';
3429
+ let settled = false;
3430
+
3431
+ const safeResolve = (value) => { if (!settled) { settled = true; resolve(value); } };
3432
+ const safeReject = (error) => { if (!settled) { settled = true; reject(error); } };
3433
+
3434
+ // Send the initial role chunk once
3435
+ writeSseData(res, {
3436
+ id: chatId,
3437
+ object: 'chat.completion.chunk',
3438
+ created,
3439
+ model: state.model || fallbackModel,
3440
+ choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null }]
3441
+ });
3442
+
3443
+ const processSseBlock = (block) => {
3444
+ if (!block || !block.trim()) return;
3445
+ const dataLines = block
3446
+ .split('\n')
3447
+ .map(line => line.trimEnd())
3448
+ .filter(line => line.trim().startsWith('data:'))
3449
+ .map(line => line.replace(/^data:\s?/, ''));
3450
+ if (dataLines.length === 0) return;
3451
+ const payload = dataLines.join('\n').trim();
3452
+ if (!payload || payload === '[DONE]') return;
3453
+
3454
+ let parsed;
3455
+ try { parsed = JSON.parse(payload); } catch { return; }
3456
+ if (!parsed || typeof parsed !== 'object') return;
3457
+
3458
+ const type = parsed.type;
3459
+ if (!type) return;
3460
+
3461
+ if (type === 'message_start') {
3462
+ const msg = parsed.message && typeof parsed.message === 'object' ? parsed.message : {};
3463
+ if (msg.model) state.model = msg.model;
3464
+ if (msg.usage) {
3465
+ state.inputTokens = Number(msg.usage.input_tokens || 0);
3466
+ state.cacheCreationTokens = Number(msg.usage.cache_creation_input_tokens || 0);
3467
+ state.cacheReadTokens = Number(msg.usage.cache_read_input_tokens || 0);
3468
+ }
3469
+ return;
3470
+ }
3471
+
3472
+ if (type === 'content_block_start') {
3473
+ const blockIndex = Number.isFinite(Number(parsed.index)) ? Number(parsed.index) : 0;
3474
+ const block = parsed.content_block && typeof parsed.content_block === 'object' ? parsed.content_block : {};
3475
+ const blockType = block.type;
3476
+ state.blockTypeByIndex.set(blockIndex, blockType);
3477
+
3478
+ if (blockType === 'tool_use') {
3479
+ const callId = String(block.id || generateToolCallId());
3480
+ const name = block.name || '';
3481
+ state.functionCallIdByIndex.set(blockIndex, callId);
3482
+ state.functionNameByIndex.set(blockIndex, name);
3483
+ state.functionArgsByIndex.set(blockIndex, '');
3484
+ const clientIndex = state.nextToolCallClientIndex++;
3485
+ state.toolCallClientIndexByBlockIndex.set(blockIndex, clientIndex);
3486
+
3487
+ // Emit tool_call start chunk
3488
+ writeSseData(res, {
3489
+ id: chatId,
3490
+ object: 'chat.completion.chunk',
3491
+ created,
3492
+ model: state.model || fallbackModel,
3493
+ choices: [{
3494
+ index: 0,
3495
+ delta: {
3496
+ tool_calls: [{
3497
+ index: clientIndex,
3498
+ id: callId,
3499
+ type: 'function',
3500
+ function: { name, arguments: '' }
3501
+ }]
3502
+ },
3503
+ finish_reason: null
3504
+ }]
3505
+ });
3506
+ }
3507
+ return;
3508
+ }
3509
+
3510
+ if (type === 'content_block_delta') {
3511
+ const blockIndex = Number.isFinite(Number(parsed.index)) ? Number(parsed.index) : 0;
3512
+ const delta = parsed.delta && typeof parsed.delta === 'object' ? parsed.delta : {};
3513
+ const deltaType = delta.type;
3514
+
3515
+ if (deltaType === 'text_delta') {
3516
+ const text = typeof delta.text === 'string' ? delta.text : '';
3517
+ if (!text) return;
3518
+ writeSseData(res, {
3519
+ id: chatId,
3520
+ object: 'chat.completion.chunk',
3521
+ created,
3522
+ model: state.model || fallbackModel,
3523
+ choices: [{ index: 0, delta: { content: text }, finish_reason: null }]
3524
+ });
3525
+ return;
3526
+ }
3527
+
3528
+ if (deltaType === 'input_json_delta') {
3529
+ const partialJson = typeof delta.partial_json === 'string' ? delta.partial_json : '';
3530
+ if (!partialJson) return;
3531
+ const prev = state.functionArgsByIndex.get(blockIndex) || '';
3532
+ state.functionArgsByIndex.set(blockIndex, prev + partialJson);
3533
+ const clientIndex = state.toolCallClientIndexByBlockIndex.get(blockIndex) ?? 0;
3534
+ writeSseData(res, {
3535
+ id: chatId,
3536
+ object: 'chat.completion.chunk',
3537
+ created,
3538
+ model: state.model || fallbackModel,
3539
+ choices: [{
3540
+ index: 0,
3541
+ delta: {
3542
+ tool_calls: [{
3543
+ index: clientIndex,
3544
+ function: { arguments: partialJson }
3545
+ }]
3546
+ },
3547
+ finish_reason: null
3548
+ }]
3549
+ });
3550
+ return;
3551
+ }
3552
+ // thinking_delta: silently skip (no equivalent in chat completions)
3553
+ return;
3554
+ }
3555
+
3556
+ if (type === 'message_delta') {
3557
+ const usage = parsed.usage && typeof parsed.usage === 'object' ? parsed.usage : {};
3558
+ if (Number.isFinite(Number(usage.output_tokens))) {
3559
+ state.outputTokens = Number(usage.output_tokens);
3560
+ }
3561
+ const stopReason = parsed.delta && parsed.delta.stop_reason;
3562
+ if (stopReason) state.stopReason = stopReason;
3563
+ return;
3564
+ }
3565
+
3566
+ if (type === 'message_stop') {
3567
+ const finishReason = mapClaudeStopReasonToChatFinishReason(state.stopReason);
3568
+ const hasToolCalls = state.nextToolCallClientIndex > 0;
3569
+
3570
+ // Final finish chunk
3571
+ writeSseData(res, {
3572
+ id: chatId,
3573
+ object: 'chat.completion.chunk',
3574
+ created,
3575
+ model: state.model || fallbackModel,
3576
+ choices: [{ index: 0, delta: {}, finish_reason: hasToolCalls ? 'tool_calls' : finishReason }]
3577
+ });
3578
+
3579
+ // Usage chunk (stream_options.include_usage)
3580
+ const inputTokens = state.inputTokens;
3581
+ const outputTokens = state.outputTokens;
3582
+ const cachedTokens = state.cacheCreationTokens + state.cacheReadTokens;
3583
+ writeSseData(res, {
3584
+ id: chatId,
3585
+ object: 'chat.completion.chunk',
3586
+ created,
3587
+ model: state.model || fallbackModel,
3588
+ choices: [],
3589
+ usage: {
3590
+ prompt_tokens: inputTokens,
3591
+ completion_tokens: outputTokens,
3592
+ total_tokens: inputTokens + outputTokens,
3593
+ ...(cachedTokens > 0 ? { prompt_tokens_details: { cached_tokens: cachedTokens } } : {})
3594
+ }
3595
+ });
3596
+
3597
+ writeSseDone(res);
3598
+ res.end();
3599
+ safeResolve({
3600
+ model: state.model || fallbackModel,
3601
+ usage: {
3602
+ input: inputTokens,
3603
+ output: outputTokens,
3604
+ cacheCreation: state.cacheCreationTokens,
3605
+ cacheRead: state.cacheReadTokens
3606
+ }
3607
+ });
3608
+ }
3609
+ };
3610
+
3611
+ stream.on('data', (chunk) => {
3612
+ buffer += chunk.toString('utf8').replace(/\r\n/g, '\n');
3613
+ let separatorIndex = buffer.indexOf('\n\n');
3614
+ while (separatorIndex >= 0) {
3615
+ const block = buffer.slice(0, separatorIndex);
3616
+ buffer = buffer.slice(separatorIndex + 2);
3617
+ processSseBlock(block);
3618
+ separatorIndex = buffer.indexOf('\n\n');
3619
+ }
3620
+ });
3621
+
3622
+ stream.on('end', () => {
3623
+ if (buffer.trim()) processSseBlock(buffer);
3624
+ if (!res.writableEnded) {
3625
+ writeSseDone(res);
3626
+ res.end();
3627
+ }
3628
+ safeResolve({ model: state.model || fallbackModel, usage: { input: state.inputTokens, output: state.outputTokens, cacheCreation: state.cacheCreationTokens, cacheRead: state.cacheReadTokens } });
3629
+ });
3630
+
3631
+ stream.on('error', (error) => {
3632
+ if (!res.writableEnded) {
3633
+ writeSseDone(res);
3634
+ res.end();
3635
+ }
3636
+ safeReject(error);
3637
+ });
3638
+
3639
+ upstreamResponse.on('error', (error) => {
3640
+ if (!res.writableEnded) {
3641
+ writeSseDone(res);
3642
+ res.end();
3643
+ }
3644
+ safeReject(error);
3645
+ });
3646
+ });
3647
+ }
3648
+
2690
3649
  async function handleClaudeGatewayRequest(req, res, channel, effectiveKey) {
2691
3650
  const pathname = getRequestPathname(req.url);
2692
3651
  if (!isResponsesPath(pathname) && !isChatCompletionsPath(pathname)) {
@@ -2703,6 +3662,7 @@ async function handleClaudeGatewayRequest(req, res, channel, effectiveKey) {
2703
3662
  const originalPayload = (req.body && typeof req.body === 'object') ? req.body : {};
2704
3663
  const wantsStream = !!originalPayload.stream;
2705
3664
  const streamResponses = wantsStream && isResponsesPath(pathname);
3665
+ const streamChatCompletions = wantsStream && isChatCompletionsPath(pathname);
2706
3666
  const sessionKey = extractSessionIdFromRequest(req, originalPayload);
2707
3667
  const sessionScope = normalizeSessionKeyValue(channel?.id || channel?.name || '');
2708
3668
  const scopedSessionKey = sessionKey && sessionScope
@@ -2713,7 +3673,7 @@ async function handleClaudeGatewayRequest(req, res, channel, effectiveKey) {
2713
3673
  const claudePayload = convertOpenCodePayloadToClaude(pathname, originalPayload, channel.model, {
2714
3674
  sessionUserId
2715
3675
  });
2716
- claudePayload.stream = streamResponses;
3676
+ claudePayload.stream = streamResponses || streamChatCompletions;
2717
3677
 
2718
3678
  const headers = {
2719
3679
  'x-api-key': effectiveKey,
@@ -2732,12 +3692,61 @@ async function handleClaudeGatewayRequest(req, res, channel, effectiveKey) {
2732
3692
  'x-stainless-os': mapStainlessOs(),
2733
3693
  'x-stainless-timeout': '600',
2734
3694
  'content-type': 'application/json',
2735
- 'accept': streamResponses ? 'text/event-stream' : 'application/json',
3695
+ 'accept': (streamResponses || streamChatCompletions) ? 'text/event-stream' : 'application/json',
2736
3696
  'accept-encoding': 'gzip, deflate, br, zstd',
2737
3697
  'connection': 'keep-alive',
2738
3698
  'user-agent': CLAUDE_CODE_USER_AGENT
2739
3699
  };
2740
3700
 
3701
+ if (streamChatCompletions) {
3702
+ let streamUpstream;
3703
+ try {
3704
+ streamUpstream = await postJsonStream(buildClaudeTargetUrl(channel.baseUrl), headers, claudePayload, 120000);
3705
+ } catch (error) {
3706
+ recordFailure(channel.id, 'opencode', error);
3707
+ sendOpenAiStyleError(res, 502, `Claude gateway network error: ${error.message}`, 'proxy_error');
3708
+ return true;
3709
+ }
3710
+
3711
+ const statusCode = Number(streamUpstream.statusCode) || 500;
3712
+ if (statusCode < 200 || statusCode >= 300) {
3713
+ let rawBody = '';
3714
+ try {
3715
+ rawBody = await collectHttpResponseBody(streamUpstream.response);
3716
+ } catch {
3717
+ rawBody = '';
3718
+ }
3719
+ let parsedError = null;
3720
+ try {
3721
+ parsedError = rawBody ? JSON.parse(rawBody) : null;
3722
+ } catch {
3723
+ parsedError = null;
3724
+ }
3725
+ const upstreamMessage = parsedError?.error?.message || parsedError?.message || rawBody || `HTTP ${statusCode}`;
3726
+ recordFailure(channel.id, 'opencode', new Error(String(upstreamMessage).slice(0, 200)));
3727
+ sendOpenAiStyleError(res, statusCode, String(upstreamMessage).slice(0, 1000), 'upstream_error');
3728
+ return true;
3729
+ }
3730
+
3731
+ try {
3732
+ const streamedResponseObject = await relayChatCompletionsStream(streamUpstream.response, res, originalPayload.model || '');
3733
+ publishOpenCodeUsageLog({
3734
+ requestId,
3735
+ channel,
3736
+ model: streamedResponseObject?.model || originalPayload.model || '',
3737
+ usage: streamedResponseObject?.usage || {},
3738
+ startTime
3739
+ });
3740
+ recordSuccess(channel.id, 'opencode');
3741
+ } catch (error) {
3742
+ recordFailure(channel.id, 'opencode', error);
3743
+ if (!res.headersSent) {
3744
+ sendOpenAiStyleError(res, 502, `Claude stream relay error: ${error.message}`, 'proxy_error');
3745
+ }
3746
+ }
3747
+ return true;
3748
+ }
3749
+
2741
3750
  if (streamResponses) {
2742
3751
  let streamUpstream;
2743
3752
  try {
@@ -2775,7 +3784,9 @@ async function handleClaudeGatewayRequest(req, res, channel, effectiveKey) {
2775
3784
  requestId,
2776
3785
  channel,
2777
3786
  model: streamedResponseObject?.model || originalPayload.model || '',
2778
- usage: streamedResponseObject?.usage || {},
3787
+ usage: streamedResponseObject?.providerMetadata
3788
+ ? { ...(streamedResponseObject.usage || {}), providerMetadata: streamedResponseObject.providerMetadata }
3789
+ : streamedResponseObject?.usage || {},
2779
3790
  startTime
2780
3791
  });
2781
3792
  recordSuccess(channel.id, 'opencode');
@@ -2882,10 +3893,11 @@ async function handleCodexGatewayRequest(req, res, channel, effectiveKey) {
2882
3893
  return true;
2883
3894
  }
2884
3895
 
2885
- const codexSessionId = `${Date.now()}-${Math.random().toString(36).slice(2, 15)}`;
3896
+ const codexSessionId = extractSessionIdFromRequest(req, originalPayload);
3897
+ const stableSessionKey = codexSessionId || `${channel.id || 'ch'}-${channel.baseUrl || ''}`;
2886
3898
  const promptCacheKey = (typeof converted.requestBody.prompt_cache_key === 'string' && converted.requestBody.prompt_cache_key.trim())
2887
3899
  ? converted.requestBody.prompt_cache_key.trim()
2888
- : codexSessionId;
3900
+ : stableSessionKey;
2889
3901
  converted.requestBody.prompt_cache_key = promptCacheKey;
2890
3902
 
2891
3903
  const headers = {
@@ -3814,21 +4826,36 @@ async function collectProxyModelList(channels = [], options = {}) {
3814
4826
  };
3815
4827
 
3816
4828
  const forceRefresh = options.forceRefresh === true;
4829
+ const probePreferredModels = options.probePreferredModels === true;
4830
+ const useCacheOnly = options.useCacheOnly === true;
3817
4831
  // 模型列表聚合改为串行探测,避免并发触发上游会话窗口限流
3818
4832
  for (const channel of channels) {
4833
+ if (useCacheOnly) {
4834
+ const cacheEntry = getCachedModelInfo(channel?.id);
4835
+ const cachedFetched = Array.isArray(cacheEntry?.fetchedModels) ? cacheEntry.fetchedModels : [];
4836
+ const cachedAvailable = Array.isArray(cacheEntry?.availableModels) ? cacheEntry.availableModels : [];
4837
+ cachedFetched.forEach(add);
4838
+ cachedAvailable.forEach(add);
4839
+ continue;
4840
+ }
4841
+
3819
4842
  try {
3820
4843
  // eslint-disable-next-line no-await-in-loop
3821
4844
  const listResult = await fetchModelsFromProvider(channel, 'openai_compatible', { forceRefresh });
3822
4845
  const listedModels = Array.isArray(listResult?.models) ? listResult.models : [];
3823
4846
  if (listedModels.length > 0) {
3824
4847
  listedModels.forEach(add);
3825
- continue;
4848
+ // 默认沿用 /v1/models 结果;仅在显式要求时继续探测默认模型。
4849
+ if (!probePreferredModels) {
4850
+ continue;
4851
+ }
3826
4852
  }
3827
4853
 
3828
4854
  const shouldProbeByDefault = !!listResult?.disabledByConfig;
3829
4855
 
3830
- // 默认仅入口转换器渠道执行模型探测;若已禁用 /v1/models 则对全部渠道启用默认探测
3831
- if (!shouldProbeByDefault && !isConverterPresetChannel(channel)) {
4856
+ // 默认仅入口转换器渠道执行模型探测;若已禁用 /v1/models 则对全部渠道启用默认探测。
4857
+ // 当显式要求 probePreferredModels 时,无论 /v1/models 是否返回都执行默认模型探测。
4858
+ if (!probePreferredModels && !shouldProbeByDefault && !isConverterPresetChannel(channel)) {
3832
4859
  continue;
3833
4860
  }
3834
4861
 
@@ -3837,6 +4864,7 @@ async function collectProxyModelList(channels = [], options = {}) {
3837
4864
  const probe = await probeModelAvailability(channel, channelType, {
3838
4865
  forceRefresh,
3839
4866
  stopOnFirstAvailable: false,
4867
+ toolType: 'opencode',
3840
4868
  preferredModels: collectPreferredProbeModels(channel)
3841
4869
  });
3842
4870
  const available = Array.isArray(probe?.availableModels) ? probe.availableModels : [];
@@ -3906,11 +4934,23 @@ async function startOpenCodeProxyServer(options = {}) {
3906
4934
  if (!proxyReq.getHeader('content-type')) {
3907
4935
  proxyReq.setHeader('content-type', 'application/json');
3908
4936
  }
4937
+ // 禁止上游返回压缩响应,避免在 proxyRes 监听器中出现双消费者竞争
4938
+ proxyReq.removeHeader('accept-encoding');
3909
4939
 
3910
4940
  if (shouldParseJson(req) && (req.rawBody || req.body)) {
3911
- const bodyBuffer = req.rawBody
3912
- ? Buffer.isBuffer(req.rawBody) ? req.rawBody : Buffer.from(req.rawBody)
3913
- : Buffer.from(JSON.stringify(req.body));
4941
+ let body = req.body;
4942
+ // Chat Completions 流式请求注入 stream_options.include_usage = true
4943
+ // OpenCode 使用 @ai-sdk/openai-compatible,该 SDK 不一定发送此字段
4944
+ // 缺少此字段时,大多数 OpenAI 兼容端点不会在响应中附带 usage,
4945
+ // 导致 OpenCode Context 面板所有 token 显示为 0
4946
+ if (body && body.stream === true && !body.stream_options?.include_usage) {
4947
+ body = { ...body, stream_options: { ...body.stream_options, include_usage: true } };
4948
+ }
4949
+ const bodyBuffer = body !== req.body
4950
+ ? Buffer.from(JSON.stringify(body))
4951
+ : req.rawBody
4952
+ ? Buffer.isBuffer(req.rawBody) ? req.rawBody : Buffer.from(req.rawBody)
4953
+ : Buffer.from(JSON.stringify(req.body));
3914
4954
  proxyReq.setHeader('Content-Length', bodyBuffer.length);
3915
4955
  proxyReq.write(bodyBuffer);
3916
4956
  proxyReq.end();
@@ -4087,18 +5127,23 @@ async function startOpenCodeProxyServer(options = {}) {
4087
5127
  inputTokens: 0,
4088
5128
  outputTokens: 0,
4089
5129
  cachedTokens: 0,
5130
+ cacheCreationTokens: 0,
5131
+ cacheReadTokens: 0,
4090
5132
  reasoningTokens: 0,
4091
5133
  totalTokens: 0,
4092
- model: ''
5134
+ model: '',
5135
+ _parseErrorLogged: false
4093
5136
  };
4094
5137
 
4095
- proxyRes.on('data', (chunk) => {
5138
+ const decodedStream = createDecodedStream(proxyRes);
5139
+
5140
+ decodedStream.on('data', (chunk) => {
4096
5141
  // 如果响应已关闭,停止处理
4097
5142
  if (isResponseClosed) {
4098
5143
  return;
4099
5144
  }
4100
5145
 
4101
- buffer += chunk.toString();
5146
+ buffer += chunk.toString('utf8');
4102
5147
 
4103
5148
  // 检查是否是 SSE 流
4104
5149
  if (proxyRes.headers['content-type']?.includes('text/event-stream')) {
@@ -4106,7 +5151,7 @@ async function startOpenCodeProxyServer(options = {}) {
4106
5151
  const events = buffer.split('\n\n');
4107
5152
  buffer = events.pop() || '';
4108
5153
 
4109
- events.forEach((eventText, index) => {
5154
+ events.forEach((eventText) => {
4110
5155
  if (!eventText.trim()) return;
4111
5156
 
4112
5157
  try {
@@ -4127,7 +5172,6 @@ async function startOpenCodeProxyServer(options = {}) {
4127
5172
 
4128
5173
  // OpenAI Responses API: 在 response.completed 事件中获取 usage
4129
5174
  if (parsed.type === 'response.completed' && parsed.response) {
4130
- // 从 response 对象中提取模型和 usage
4131
5175
  if (parsed.response.model) {
4132
5176
  tokenData.model = parsed.response.model;
4133
5177
  }
@@ -4137,7 +5181,6 @@ async function startOpenCodeProxyServer(options = {}) {
4137
5181
  tokenData.outputTokens = parsed.response.usage.output_tokens || 0;
4138
5182
  tokenData.totalTokens = parsed.response.usage.total_tokens || 0;
4139
5183
 
4140
- // 提取详细信息
4141
5184
  if (parsed.response.usage.input_tokens_details) {
4142
5185
  tokenData.cachedTokens = parsed.response.usage.input_tokens_details.cached_tokens || 0;
4143
5186
  }
@@ -4147,24 +5190,81 @@ async function startOpenCodeProxyServer(options = {}) {
4147
5190
  }
4148
5191
  }
4149
5192
 
5193
+ // Anthropic SSE: message_start 含初始 usage 和模型
5194
+ if (parsed.type === 'message_start' && parsed.message) {
5195
+ if (parsed.message.model) {
5196
+ tokenData.model = parsed.message.model;
5197
+ }
5198
+ if (parsed.message.usage) {
5199
+ const u = parsed.message.usage;
5200
+ if (Number.isFinite(Number(u.input_tokens))) {
5201
+ tokenData.inputTokens = Number(u.input_tokens);
5202
+ }
5203
+ if (Number.isFinite(Number(u.output_tokens))) {
5204
+ tokenData.outputTokens = Number(u.output_tokens);
5205
+ }
5206
+ const cacheCreation = Number(u.cache_creation_input_tokens || 0);
5207
+ const cacheRead = Number(u.cache_read_input_tokens || 0);
5208
+ if (cacheCreation + cacheRead > 0) {
5209
+ tokenData.cacheCreationTokens = cacheCreation;
5210
+ tokenData.cacheReadTokens = cacheRead;
5211
+ tokenData.cachedTokens = cacheCreation + cacheRead;
5212
+ }
5213
+ }
5214
+ }
5215
+
5216
+ // Anthropic SSE: message_delta 含最终 output_tokens
5217
+ if (parsed.type === 'message_delta' && parsed.usage) {
5218
+ const u = parsed.usage;
5219
+ if (Number.isFinite(Number(u.output_tokens))) {
5220
+ tokenData.outputTokens = Number(u.output_tokens);
5221
+ }
5222
+ const cacheCreation = Number(u.cache_creation_input_tokens || 0);
5223
+ const cacheRead = Number(u.cache_read_input_tokens || 0);
5224
+ if (cacheCreation + cacheRead > 0) {
5225
+ tokenData.cacheCreationTokens = cacheCreation;
5226
+ tokenData.cacheReadTokens = cacheRead;
5227
+ tokenData.cachedTokens = cacheCreation + cacheRead;
5228
+ }
5229
+ }
5230
+
4150
5231
  // 兼容其他格式:直接在顶层的 model 和 usage
4151
5232
  if (parsed.model && !tokenData.model) {
4152
5233
  tokenData.model = parsed.model;
4153
5234
  }
4154
5235
 
4155
5236
  if (parsed.usage && tokenData.inputTokens === 0) {
4156
- // 兼容 Responses API 和 Chat Completions API
4157
5237
  tokenData.inputTokens = parsed.usage.input_tokens || parsed.usage.prompt_tokens || 0;
4158
5238
  tokenData.outputTokens = parsed.usage.output_tokens || parsed.usage.completion_tokens || 0;
5239
+ const cacheCreation = Number(parsed.usage.cache_creation_input_tokens || 0);
5240
+ const cacheRead = Number(parsed.usage.cache_read_input_tokens || 0);
5241
+ if (cacheCreation + cacheRead > 0) {
5242
+ tokenData.cacheCreationTokens = cacheCreation;
5243
+ tokenData.cacheReadTokens = cacheRead;
5244
+ tokenData.cachedTokens = cacheCreation + cacheRead;
5245
+ }
5246
+ }
5247
+
5248
+ // Gemini SSE: usageMetadata
5249
+ if (parsed.usageMetadata) {
5250
+ const u = parsed.usageMetadata;
5251
+ tokenData.inputTokens = Number(u.promptTokenCount || 0);
5252
+ tokenData.outputTokens = Number(u.candidatesTokenCount || 0);
5253
+ tokenData.cachedTokens = Number(u.cachedContentTokenCount || 0);
5254
+ tokenData.totalTokens = Number(u.totalTokenCount || 0);
4159
5255
  }
4160
5256
  } catch (err) {
4161
- // 忽略解析错误
5257
+ if (!tokenData._parseErrorLogged) {
5258
+ tokenData._parseErrorLogged = true;
5259
+ const snippet = typeof data === 'string' ? data.slice(0, 100) : '';
5260
+ console.warn(`[OpenCode Passthrough] SSE parse error (channel: ${metadata?.channel}): ${err.message}, data: ${snippet}`);
5261
+ }
4162
5262
  }
4163
5263
  });
4164
5264
  }
4165
5265
  });
4166
5266
 
4167
- proxyRes.on('end', () => {
5267
+ decodedStream.on('end', () => {
4168
5268
  // 如果不是流式响应,尝试从完整响应中解析
4169
5269
  if (!proxyRes.headers['content-type']?.includes('text/event-stream')) {
4170
5270
  try {
@@ -4173,12 +5273,21 @@ async function startOpenCodeProxyServer(options = {}) {
4173
5273
  tokenData.model = parsed.model;
4174
5274
  }
4175
5275
  if (parsed.usage) {
4176
- // 兼容两种格式
4177
5276
  tokenData.inputTokens = parsed.usage.input_tokens || parsed.usage.prompt_tokens || 0;
4178
5277
  tokenData.outputTokens = parsed.usage.output_tokens || parsed.usage.completion_tokens || 0;
5278
+ const cacheCreation = Number(parsed.usage.cache_creation_input_tokens || 0);
5279
+ const cacheRead = Number(parsed.usage.cache_read_input_tokens || 0);
5280
+ if (cacheCreation + cacheRead > 0) {
5281
+ tokenData.cacheCreationTokens = cacheCreation;
5282
+ tokenData.cacheReadTokens = cacheRead;
5283
+ tokenData.cachedTokens = cacheCreation + cacheRead;
5284
+ }
4179
5285
  }
4180
5286
  } catch (err) {
4181
- // 忽略解析错误
5287
+ if (!tokenData._parseErrorLogged) {
5288
+ tokenData._parseErrorLogged = true;
5289
+ console.warn(`[OpenCode Passthrough] Non-SSE response parse error (channel: ${metadata?.channel}): ${err.message}`);
5290
+ }
4182
5291
  }
4183
5292
  }
4184
5293
 
@@ -4196,7 +5305,9 @@ async function startOpenCodeProxyServer(options = {}) {
4196
5305
  const tokens = {
4197
5306
  input: tokenData.inputTokens,
4198
5307
  output: tokenData.outputTokens,
4199
- total: tokenData.inputTokens + tokenData.outputTokens
5308
+ cacheCreation: tokenData.cacheCreationTokens,
5309
+ cacheRead: tokenData.cacheReadTokens,
5310
+ total: tokenData.totalTokens || (tokenData.inputTokens + tokenData.outputTokens)
4200
5311
  };
4201
5312
  const cost = calculateCost(tokenData.model, tokens);
4202
5313
 
@@ -4247,7 +5358,7 @@ async function startOpenCodeProxyServer(options = {}) {
4247
5358
  }
4248
5359
  });
4249
5360
 
4250
- proxyRes.on('error', (err) => {
5361
+ decodedStream.on('error', (err) => {
4251
5362
  // 忽略代理响应错误(可能是网络问题)
4252
5363
  if (err.code !== 'EPIPE' && err.code !== 'ECONNRESET') {
4253
5364
  console.error('Proxy response error:', err);