evalscope 0.17.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. evalscope/__init__.py +4 -1
  2. evalscope/api/__init__.py +0 -0
  3. evalscope/api/benchmark/__init__.py +3 -0
  4. evalscope/api/benchmark/adapters/__init__.py +3 -0
  5. evalscope/api/benchmark/adapters/default_data_adapter.py +683 -0
  6. evalscope/api/benchmark/adapters/multi_choice_adapter.py +83 -0
  7. evalscope/api/benchmark/adapters/text2image_adapter.py +155 -0
  8. evalscope/api/benchmark/benchmark.py +321 -0
  9. evalscope/api/benchmark/meta.py +115 -0
  10. evalscope/api/dataset/__init__.py +2 -0
  11. evalscope/api/dataset/dataset.py +349 -0
  12. evalscope/api/dataset/loader.py +261 -0
  13. evalscope/api/dataset/utils.py +143 -0
  14. evalscope/api/evaluator/__init__.py +3 -0
  15. evalscope/api/evaluator/cache.py +355 -0
  16. evalscope/api/evaluator/evaluator.py +56 -0
  17. evalscope/api/evaluator/state.py +264 -0
  18. evalscope/api/filter/__init__.py +1 -0
  19. evalscope/api/filter/filter.py +72 -0
  20. evalscope/api/messages/__init__.py +11 -0
  21. evalscope/api/messages/chat_message.py +198 -0
  22. evalscope/api/messages/content.py +102 -0
  23. evalscope/api/messages/utils.py +35 -0
  24. evalscope/api/metric/__init__.py +2 -0
  25. evalscope/api/metric/metric.py +55 -0
  26. evalscope/api/metric/scorer.py +105 -0
  27. evalscope/api/mixin/__init__.py +2 -0
  28. evalscope/api/mixin/dataset_mixin.py +105 -0
  29. evalscope/api/mixin/llm_judge_mixin.py +168 -0
  30. evalscope/api/model/__init__.py +12 -0
  31. evalscope/api/model/generate_config.py +157 -0
  32. evalscope/api/model/model.py +383 -0
  33. evalscope/api/model/model_output.py +285 -0
  34. evalscope/api/registry.py +182 -0
  35. evalscope/api/tool/__init__.py +3 -0
  36. evalscope/api/tool/tool_call.py +101 -0
  37. evalscope/api/tool/tool_info.py +173 -0
  38. evalscope/api/tool/utils.py +64 -0
  39. evalscope/app/ui/app_ui.py +2 -1
  40. evalscope/app/ui/multi_model.py +50 -25
  41. evalscope/app/ui/single_model.py +23 -11
  42. evalscope/app/utils/data_utils.py +42 -26
  43. evalscope/app/utils/text_utils.py +0 -2
  44. evalscope/app/utils/visualization.py +9 -4
  45. evalscope/arguments.py +6 -7
  46. evalscope/backend/opencompass/api_meta_template.py +2 -1
  47. evalscope/backend/opencompass/backend_manager.py +6 -3
  48. evalscope/backend/rag_eval/clip_benchmark/dataset_builder.py +10 -10
  49. evalscope/backend/rag_eval/clip_benchmark/task_template.py +8 -4
  50. evalscope/backend/rag_eval/ragas/task_template.py +2 -1
  51. evalscope/backend/rag_eval/ragas/tasks/build_distribution.py +2 -1
  52. evalscope/backend/rag_eval/ragas/tasks/build_transform.py +7 -4
  53. evalscope/backend/rag_eval/ragas/tasks/testset_generation.py +2 -1
  54. evalscope/backend/rag_eval/ragas/tasks/translate_prompt.py +2 -1
  55. evalscope/backend/rag_eval/utils/embedding.py +2 -1
  56. evalscope/backend/rag_eval/utils/llm.py +13 -12
  57. evalscope/benchmarks/__init__.py +0 -2
  58. evalscope/benchmarks/aigc/i2i/__init__.py +0 -0
  59. evalscope/benchmarks/aigc/i2i/general_i2i_adapter.py +44 -0
  60. evalscope/benchmarks/aigc/t2i/evalmuse_adapter.py +53 -55
  61. evalscope/benchmarks/aigc/t2i/genai_bench_adapter.py +41 -46
  62. evalscope/benchmarks/aigc/t2i/general_t2i_adapter.py +29 -45
  63. evalscope/benchmarks/aigc/t2i/hpdv2_adapter.py +34 -44
  64. evalscope/benchmarks/aigc/t2i/tifa_adapter.py +16 -27
  65. evalscope/benchmarks/aime/aime24_adapter.py +38 -40
  66. evalscope/benchmarks/aime/aime25_adapter.py +34 -40
  67. evalscope/benchmarks/alpaca_eval/alpaca_eval_adapter.py +86 -60
  68. evalscope/benchmarks/arc/arc_adapter.py +34 -147
  69. evalscope/benchmarks/arena_hard/arena_hard_adapter.py +96 -70
  70. evalscope/benchmarks/arena_hard/utils.py +37 -1
  71. evalscope/benchmarks/bbh/bbh_adapter.py +72 -144
  72. evalscope/benchmarks/bfcl/bfcl_adapter.py +181 -160
  73. evalscope/benchmarks/bfcl/generation.py +222 -0
  74. evalscope/benchmarks/ceval/ceval_adapter.py +94 -162
  75. evalscope/benchmarks/chinese_simple_qa/csimple_qa_adapter.py +85 -82
  76. evalscope/benchmarks/cmmlu/cmmlu_adapter.py +34 -125
  77. evalscope/benchmarks/competition_math/competition_math_adapter.py +56 -108
  78. evalscope/benchmarks/data_collection/data_collection_adapter.py +183 -45
  79. evalscope/benchmarks/docmath/docmath_adapter.py +109 -51
  80. evalscope/benchmarks/docmath/utils.py +4 -5
  81. evalscope/benchmarks/drop/drop_adapter.py +88 -40
  82. evalscope/benchmarks/frames/frames_adapter.py +135 -52
  83. evalscope/benchmarks/general_arena/general_arena_adapter.py +136 -98
  84. evalscope/benchmarks/general_arena/utils.py +23 -27
  85. evalscope/benchmarks/general_mcq/general_mcq_adapter.py +40 -101
  86. evalscope/benchmarks/general_qa/general_qa_adapter.py +73 -134
  87. evalscope/benchmarks/gpqa/gpqa_adapter.py +61 -100
  88. evalscope/benchmarks/gpqa/{chain_of_thought.txt → prompt.py} +12 -5
  89. evalscope/benchmarks/gsm8k/gsm8k_adapter.py +62 -142
  90. evalscope/benchmarks/hellaswag/hellaswag_adapter.py +35 -124
  91. evalscope/benchmarks/hle/hle_adapter.py +127 -93
  92. evalscope/benchmarks/humaneval/humaneval_adapter.py +86 -55
  93. evalscope/benchmarks/ifeval/ifeval_adapter.py +69 -40
  94. evalscope/benchmarks/ifeval/instructions.py +109 -64
  95. evalscope/benchmarks/ifeval/instructions_registry.py +1 -1
  96. evalscope/benchmarks/ifeval/utils.py +6 -7
  97. evalscope/benchmarks/iquiz/iquiz_adapter.py +30 -65
  98. evalscope/benchmarks/live_code_bench/evaluate_utils.py +2 -2
  99. evalscope/benchmarks/live_code_bench/live_code_bench_adapter.py +121 -71
  100. evalscope/benchmarks/live_code_bench/load_utils.py +13 -21
  101. evalscope/benchmarks/live_code_bench/testing_util.py +6 -2
  102. evalscope/benchmarks/maritime_bench/maritime_bench_adapter.py +49 -75
  103. evalscope/benchmarks/math_500/math_500_adapter.py +41 -48
  104. evalscope/benchmarks/mmlu/mmlu_adapter.py +32 -205
  105. evalscope/benchmarks/mmlu_pro/mmlu_pro_adapter.py +80 -99
  106. evalscope/benchmarks/mmlu_redux/mmlu_redux_adapter.py +64 -110
  107. evalscope/benchmarks/musr/musr_adapter.py +33 -64
  108. evalscope/benchmarks/needle_haystack/needle_haystack_adapter.py +192 -152
  109. evalscope/benchmarks/process_bench/process_bench_adapter.py +144 -76
  110. evalscope/benchmarks/race/race_adapter.py +33 -119
  111. evalscope/benchmarks/simple_qa/simple_qa_adapter.py +72 -70
  112. evalscope/benchmarks/super_gpqa/{five_shot_prompt.txt → prompt.py} +14 -16
  113. evalscope/benchmarks/super_gpqa/super_gpqa_adapter.py +73 -117
  114. evalscope/benchmarks/super_gpqa/utils.py +2 -1
  115. evalscope/benchmarks/tau_bench/generation.py +147 -0
  116. evalscope/benchmarks/tau_bench/tau_bench_adapter.py +112 -54
  117. evalscope/benchmarks/tool_bench/tool_bench_adapter.py +91 -70
  118. evalscope/benchmarks/trivia_qa/trivia_qa_adapter.py +56 -124
  119. evalscope/benchmarks/truthful_qa/truthful_qa_adapter.py +70 -265
  120. evalscope/benchmarks/winogrande/winogrande_adapter.py +28 -54
  121. evalscope/cli/cli.py +2 -0
  122. evalscope/cli/start_server.py +6 -3
  123. evalscope/collections/__init__.py +2 -10
  124. evalscope/collections/sampler.py +10 -10
  125. evalscope/collections/schema.py +13 -11
  126. evalscope/config.py +95 -54
  127. evalscope/constants.py +29 -61
  128. evalscope/evaluator/__init__.py +1 -1
  129. evalscope/evaluator/evaluator.py +277 -423
  130. evalscope/filters/__init__.py +2 -0
  131. evalscope/filters/extraction.py +126 -0
  132. evalscope/filters/selection.py +57 -0
  133. evalscope/metrics/__init__.py +13 -13
  134. evalscope/metrics/llm_judge.py +32 -30
  135. evalscope/metrics/math_parser.py +27 -22
  136. evalscope/metrics/metric.py +307 -0
  137. evalscope/metrics/metrics.py +22 -18
  138. evalscope/metrics/t2v_metrics/__init__.py +0 -52
  139. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/clip_model.py +4 -2
  140. evalscope/metrics/t2v_metrics/models/clipscore_models/build_mps_model/cross_modeling.py +9 -13
  141. evalscope/metrics/t2v_metrics/models/clipscore_models/clip_model.py +2 -1
  142. evalscope/metrics/t2v_metrics/models/clipscore_models/hpsv2_model.py +3 -2
  143. evalscope/metrics/t2v_metrics/models/clipscore_models/mps_model.py +2 -1
  144. evalscope/metrics/t2v_metrics/models/clipscore_models/pickscore_model.py +2 -2
  145. evalscope/metrics/t2v_metrics/models/itmscore_models/blip2_itm_model.py +2 -1
  146. evalscope/metrics/t2v_metrics/models/itmscore_models/fga_blip2_model.py +4 -2
  147. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/ImageReward.py +10 -5
  148. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward/blip_pretrain.py +4 -2
  149. evalscope/metrics/t2v_metrics/models/itmscore_models/image_reward_model.py +2 -1
  150. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/language_model/clip_t5.py +15 -9
  151. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5/model/multimodal_encoder/clip_encoder.py +4 -2
  152. evalscope/metrics/t2v_metrics/models/vqascore_models/clip_t5_model.py +15 -10
  153. evalscope/metrics/t2v_metrics/models/vqascore_models/gpt4v_model.py +9 -6
  154. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/config.py +2 -2
  155. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/gradcam.py +4 -2
  156. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/logger.py +4 -2
  157. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/optims.py +3 -9
  158. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/registry.py +16 -10
  159. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa.py +3 -2
  160. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/common/vqa_tools/vqa_eval.py +4 -2
  161. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/__init__.py +8 -4
  162. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/Qformer.py +47 -25
  163. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_qformer.py +12 -7
  164. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5.py +23 -17
  165. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/blip2_t5_instruct.py +33 -23
  166. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/fga_blip2.py +2 -1
  167. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_llama.py +46 -30
  168. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip2_models/modeling_t5.py +69 -37
  169. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/__init__.py +7 -5
  170. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip.py +6 -4
  171. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_image_text_matching.py +7 -5
  172. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_nlvr.py +3 -2
  173. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_outputs.py +5 -2
  174. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/blip_vqa.py +17 -13
  175. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/blip_models/nlvr_encoder.py +35 -19
  176. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/clip_vit.py +14 -12
  177. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/eva_vit.py +63 -52
  178. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/med.py +63 -38
  179. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/models/vit.py +6 -3
  180. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/__init__.py +6 -2
  181. evalscope/metrics/t2v_metrics/models/vqascore_models/lavis/processors/randaugment.py +3 -2
  182. evalscope/metrics/t2v_metrics/models/vqascore_models/mm_utils.py +15 -13
  183. evalscope/metrics/t2v_metrics/models/vqascore_models/vqa_model.py +3 -2
  184. evalscope/models/__init__.py +6 -29
  185. evalscope/models/mockllm.py +65 -0
  186. evalscope/models/model_apis.py +47 -0
  187. evalscope/models/modelscope.py +455 -0
  188. evalscope/models/openai_compatible.py +123 -0
  189. evalscope/models/text2image_model.py +124 -0
  190. evalscope/models/utils/openai.py +698 -0
  191. evalscope/perf/benchmark.py +2 -1
  192. evalscope/perf/http_client.py +4 -2
  193. evalscope/perf/plugin/api/custom_api.py +5 -4
  194. evalscope/perf/plugin/api/openai_api.py +11 -9
  195. evalscope/perf/plugin/datasets/custom.py +2 -1
  196. evalscope/perf/plugin/datasets/flickr8k.py +1 -1
  197. evalscope/perf/plugin/datasets/kontext_bench.py +1 -1
  198. evalscope/perf/plugin/datasets/line_by_line.py +2 -1
  199. evalscope/perf/plugin/datasets/longalpaca.py +2 -1
  200. evalscope/perf/plugin/datasets/openqa.py +4 -2
  201. evalscope/perf/utils/benchmark_util.py +7 -5
  202. evalscope/perf/utils/db_util.py +9 -6
  203. evalscope/perf/utils/local_server.py +8 -3
  204. evalscope/perf/utils/rich_display.py +16 -10
  205. evalscope/report/__init__.py +2 -2
  206. evalscope/report/combinator.py +18 -12
  207. evalscope/report/generator.py +101 -6
  208. evalscope/report/{utils.py → report.py} +8 -6
  209. evalscope/run.py +26 -44
  210. evalscope/summarizer.py +1 -1
  211. evalscope/utils/__init__.py +21 -2
  212. evalscope/utils/chat_service.py +2 -1
  213. evalscope/utils/deprecation_utils.py +12 -1
  214. evalscope/utils/function_utils.py +29 -0
  215. evalscope/utils/io_utils.py +100 -5
  216. evalscope/utils/json_schema.py +208 -0
  217. evalscope/utils/logger.py +51 -12
  218. evalscope/utils/model_utils.py +10 -7
  219. evalscope/utils/multi_choices.py +271 -0
  220. evalscope/utils/url_utils.py +65 -0
  221. evalscope/version.py +2 -2
  222. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/METADATA +98 -49
  223. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/RECORD +234 -216
  224. tests/aigc/test_t2i.py +22 -4
  225. tests/benchmark/__init__.py +1 -0
  226. tests/benchmark/test_eval.py +386 -0
  227. tests/cli/test_all.py +3 -5
  228. tests/cli/test_collection.py +13 -4
  229. tests/cli/test_custom.py +22 -15
  230. tests/rag/test_clip_benchmark.py +1 -0
  231. evalscope/benchmarks/aigc/t2i/base.py +0 -56
  232. evalscope/benchmarks/arc/ai2_arc.py +0 -151
  233. evalscope/benchmarks/benchmark.py +0 -81
  234. evalscope/benchmarks/ceval/ceval_exam.py +0 -146
  235. evalscope/benchmarks/cmmlu/cmmlu.py +0 -161
  236. evalscope/benchmarks/cmmlu/samples.jsonl +0 -5
  237. evalscope/benchmarks/competition_math/competition_math.py +0 -79
  238. evalscope/benchmarks/data_adapter.py +0 -528
  239. evalscope/benchmarks/filters.py +0 -59
  240. evalscope/benchmarks/gsm8k/gsm8k.py +0 -121
  241. evalscope/benchmarks/hellaswag/hellaswag.py +0 -112
  242. evalscope/benchmarks/humaneval/humaneval.py +0 -79
  243. evalscope/benchmarks/mmlu/mmlu.py +0 -160
  244. evalscope/benchmarks/mmlu/samples.jsonl +0 -5
  245. evalscope/benchmarks/process_bench/critique_template.txt +0 -13
  246. evalscope/benchmarks/race/race.py +0 -104
  247. evalscope/benchmarks/race/samples.jsonl +0 -5
  248. evalscope/benchmarks/super_gpqa/zero_shot_prompt.txt +0 -4
  249. evalscope/benchmarks/trivia_qa/trivia_qa.py +0 -89
  250. evalscope/benchmarks/truthful_qa/truthful_qa.py +0 -163
  251. evalscope/benchmarks/utils.py +0 -60
  252. evalscope/collections/evaluator.py +0 -375
  253. evalscope/metrics/completion_parsers.py +0 -227
  254. evalscope/metrics/named_metrics.py +0 -55
  255. evalscope/models/adapters/__init__.py +0 -14
  256. evalscope/models/adapters/base_adapter.py +0 -84
  257. evalscope/models/adapters/bfcl_adapter.py +0 -246
  258. evalscope/models/adapters/chat_adapter.py +0 -207
  259. evalscope/models/adapters/choice_adapter.py +0 -222
  260. evalscope/models/adapters/custom_adapter.py +0 -71
  261. evalscope/models/adapters/server_adapter.py +0 -236
  262. evalscope/models/adapters/t2i_adapter.py +0 -79
  263. evalscope/models/adapters/tau_bench_adapter.py +0 -189
  264. evalscope/models/custom/__init__.py +0 -4
  265. evalscope/models/custom/custom_model.py +0 -50
  266. evalscope/models/custom/dummy_model.py +0 -99
  267. evalscope/models/local_model.py +0 -128
  268. evalscope/models/register.py +0 -41
  269. tests/cli/test_run.py +0 -489
  270. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/LICENSE +0 -0
  271. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/WHEEL +0 -0
  272. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/entry_points.txt +0 -0
  273. {evalscope-0.17.1.dist-info → evalscope-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,698 @@
1
+ import base64
2
+ import json
3
+ import re
4
+ from collections import defaultdict
5
+ from copy import copy
6
+ from openai import APIStatusError, OpenAIError
7
+ from openai.types.chat import (
8
+ ChatCompletion,
9
+ ChatCompletionAssistantMessageParam,
10
+ ChatCompletionChunk,
11
+ ChatCompletionContentPartImageParam,
12
+ ChatCompletionContentPartInputAudioParam,
13
+ ChatCompletionContentPartParam,
14
+ ChatCompletionContentPartRefusalParam,
15
+ ChatCompletionContentPartTextParam,
16
+ ChatCompletionDeveloperMessageParam,
17
+ ChatCompletionMessage,
18
+ ChatCompletionMessageParam,
19
+ ChatCompletionMessageToolCall,
20
+ ChatCompletionMessageToolCallParam,
21
+ ChatCompletionNamedToolChoiceParam,
22
+ ChatCompletionSystemMessageParam,
23
+ ChatCompletionToolChoiceOptionParam,
24
+ ChatCompletionToolMessageParam,
25
+ ChatCompletionToolParam,
26
+ ChatCompletionUserMessageParam,
27
+ )
28
+ from openai.types.chat.chat_completion import Choice, ChoiceLogprobs
29
+ from openai.types.chat.chat_completion_message_tool_call import Function
30
+ from openai.types.completion_usage import CompletionUsage
31
+ from openai.types.shared_params.function_definition import FunctionDefinition
32
+ from pydantic import JsonValue
33
+ from typing import Any, Dict, List, Literal, Optional, Tuple, Union
34
+
35
+ from evalscope.api.messages import (
36
+ ChatMessage,
37
+ ChatMessageAssistant,
38
+ ChatMessageSystem,
39
+ ChatMessageTool,
40
+ ChatMessageUser,
41
+ Content,
42
+ ContentAudio,
43
+ ContentImage,
44
+ ContentReasoning,
45
+ ContentText,
46
+ parse_content_with_reasoning,
47
+ )
48
+ from evalscope.api.model import (
49
+ ChatCompletionChoice,
50
+ GenerateConfig,
51
+ Logprobs,
52
+ ModelOutput,
53
+ ModelUsage,
54
+ StopReason,
55
+ as_stop_reason,
56
+ )
57
+ from evalscope.api.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo, parse_tool_call
58
+ from evalscope.utils.url_utils import file_as_data_uri, is_http_url
59
+
60
+ BASE_64_DATA_REMOVED = '<base64-data-removed>'
61
+
62
+
63
+ class OpenAIResponseError(OpenAIError):
64
+
65
+ def __init__(self, code: str, message: str) -> None:
66
+ self.code = code
67
+ self.message = message
68
+
69
+ def __str__(self) -> str:
70
+ return f'{self.code}: {self.message}'
71
+
72
+
73
+ def openai_chat_tool_call(tool_call: ToolCall) -> ChatCompletionMessageToolCall:
74
+ return ChatCompletionMessageToolCall(
75
+ type='function',
76
+ id=tool_call.id,
77
+ function=Function(name=tool_call.function.name, arguments=json.dumps(tool_call.function.arguments)),
78
+ )
79
+
80
+
81
+ def openai_chat_tool_call_param(tool_call: ToolCall) -> ChatCompletionMessageToolCallParam:
82
+ return ChatCompletionMessageToolCallParam(
83
+ id=tool_call.id,
84
+ function=dict(name=tool_call.function.name, arguments=json.dumps(tool_call.function.arguments)),
85
+ type='function',
86
+ )
87
+
88
+
89
+ def openai_chat_completion_part(content: Content) -> ChatCompletionContentPartParam:
90
+ if content.type == 'text':
91
+ return ChatCompletionContentPartTextParam(type='text', text=content.text)
92
+ elif content.type == 'image':
93
+ # API takes URL or base64 encoded file. If it's a remote file or
94
+ # data URL leave it alone, otherwise encode it
95
+ image_url = content.image
96
+ detail = content.detail
97
+
98
+ if not is_http_url(image_url):
99
+ image_url = file_as_data_uri(image_url)
100
+
101
+ return ChatCompletionContentPartImageParam(
102
+ type='image_url',
103
+ image_url=dict(url=image_url, detail=detail),
104
+ )
105
+ elif content.type == 'audio':
106
+ audio_data_uri = file_as_data_uri(content.audio)
107
+ audio_data = audio_data_uri.split('base64,')[1]
108
+
109
+ return ChatCompletionContentPartInputAudioParam(
110
+ type='input_audio', input_audio=dict(data=audio_data, format=content.format)
111
+ )
112
+
113
+ else:
114
+ raise RuntimeError('Video content is not currently supported by Open AI chat models.')
115
+
116
+
117
+ def openai_chat_message(
118
+ message: ChatMessage, system_role: Literal['user', 'system', 'developer'] = 'system'
119
+ ) -> ChatCompletionMessageParam:
120
+ if message.role == 'system':
121
+ if system_role == 'user':
122
+ return ChatCompletionUserMessageParam(role='user', content=message.text)
123
+ elif system_role == 'system':
124
+ return ChatCompletionSystemMessageParam(role=message.role, content=message.text)
125
+ elif system_role == 'developer':
126
+ return ChatCompletionDeveloperMessageParam(role='developer', content=message.text)
127
+ elif message.role == 'user':
128
+ return ChatCompletionUserMessageParam(
129
+ role=message.role,
130
+ content=(
131
+ message.content if isinstance(message.content, str) else
132
+ [openai_chat_completion_part(content) for content in message.content]
133
+ ),
134
+ )
135
+ elif message.role == 'assistant':
136
+ if message.tool_calls:
137
+ return ChatCompletionAssistantMessageParam(
138
+ role=message.role,
139
+ content=openai_assistant_content(message),
140
+ tool_calls=[openai_chat_tool_call_param(call) for call in message.tool_calls],
141
+ )
142
+ else:
143
+ return ChatCompletionAssistantMessageParam(role=message.role, content=openai_assistant_content(message))
144
+ elif message.role == 'tool':
145
+ return ChatCompletionToolMessageParam(
146
+ role=message.role,
147
+ content=(f'Error: {message.error.message}' if message.error else message.text),
148
+ tool_call_id=str(message.tool_call_id),
149
+ )
150
+ else:
151
+ raise ValueError(f'Unexpected message role {message.role}')
152
+
153
+
154
+ def openai_chat_messages(
155
+ messages: List[ChatMessage],
156
+ system_role: Literal['user', 'system', 'developer'] = 'system',
157
+ ) -> List[ChatCompletionMessageParam]:
158
+ return [openai_chat_message(message, system_role) for message in messages]
159
+
160
+
161
+ def openai_completion_params(model: str, config: GenerateConfig, tools: bool) -> Dict[str, Any]:
162
+ params: Dict[str, Any] = dict(model=model)
163
+ # handle stream option
164
+ if config.stream is not None:
165
+ params['stream'] = config.stream
166
+ if config.stream:
167
+ params['stream_options'] = {'include_usage': True}
168
+ if config.timeout is not None:
169
+ params['timeout'] = config.timeout
170
+ if config.max_tokens is not None:
171
+ params['max_tokens'] = config.max_tokens
172
+ if config.frequency_penalty is not None:
173
+ params['frequency_penalty'] = config.frequency_penalty
174
+ if config.stop_seqs is not None:
175
+ params['stop'] = config.stop_seqs
176
+ if config.presence_penalty is not None:
177
+ params['presence_penalty'] = config.presence_penalty
178
+ if config.logit_bias is not None:
179
+ params['logit_bias'] = config.logit_bias
180
+ if config.seed is not None:
181
+ params['seed'] = config.seed
182
+ if config.temperature is not None:
183
+ params['temperature'] = config.temperature
184
+ if config.top_p is not None:
185
+ params['top_p'] = config.top_p
186
+ if config.n is not None:
187
+ params['n'] = config.n
188
+ if config.logprobs is not None:
189
+ params['logprobs'] = config.logprobs
190
+ if config.top_logprobs is not None:
191
+ params['top_logprobs'] = config.top_logprobs
192
+ if tools and config.parallel_tool_calls is not None:
193
+ params['parallel_tool_calls'] = config.parallel_tool_calls
194
+ if config.reasoning_effort is not None:
195
+ params['reasoning_effort'] = config.reasoning_effort
196
+ if config.response_schema is not None:
197
+ params['response_format'] = dict(
198
+ type='json_schema',
199
+ json_schema=dict(
200
+ name=config.response_schema.name,
201
+ schema=config.response_schema.json_schema.model_dump(exclude_none=True),
202
+ description=config.response_schema.description,
203
+ strict=config.response_schema.strict,
204
+ ),
205
+ )
206
+ if config.extra_body:
207
+ params['extra_body'] = config.extra_body
208
+
209
+ return params
210
+
211
+
212
+ def openai_assistant_content(message: ChatMessageAssistant) -> str:
213
+ # In agent bridge scenarios, we could encounter concepts such as reasoning and
214
+ # .internal use in the ChatMessageAssistant that are not supported by the OpenAI
215
+ # choices API. This code smuggles that data into the plain text so that it
216
+ # survives multi-turn round trips.
217
+
218
+ if isinstance(message.content, str):
219
+ content = message.content
220
+ else:
221
+ content = ''
222
+ for c in message.content:
223
+ if c.type == 'reasoning':
224
+ attribs = ''
225
+ if c.signature is not None:
226
+ attribs = f'{attribs} signature="{c.signature}"'
227
+ if c.redacted:
228
+ attribs = f'{attribs} redacted="true"'
229
+ content = f'{content}\n<think{attribs}>\n{c.reasoning}\n</think>\n'
230
+ elif c.type == 'text':
231
+ content = f'{content}\n{c.text}'
232
+
233
+ if message.internal:
234
+ content = f"""{content}\n<internal>{
235
+ base64.b64encode(json.dumps(message.internal).encode("utf-8")).decode(
236
+ "utf-8"
237
+ )
238
+ }</internal>\n"""
239
+ return content
240
+
241
+
242
+ def openai_chat_choices(choices: List[ChatCompletionChoice]) -> List[Choice]:
243
+ oai_choices: List[Choice] = []
244
+
245
+ for index, choice in enumerate(choices):
246
+ content = openai_assistant_content(choice.message)
247
+ if choice.message.tool_calls:
248
+ tool_calls = [openai_chat_tool_call(tc) for tc in choice.message.tool_calls]
249
+ else:
250
+ tool_calls = None
251
+ message = ChatCompletionMessage(role='assistant', content=content, tool_calls=tool_calls)
252
+ oai_choices.append(
253
+ Choice(
254
+ finish_reason=openai_finish_reason(choice.stop_reason),
255
+ index=index,
256
+ message=message,
257
+ logprobs=ChoiceLogprobs(**choice.logprobs.model_dump()) if choice.logprobs is not None else None,
258
+ )
259
+ )
260
+
261
+ return oai_choices
262
+
263
+
264
+ def openai_completion_usage(usage: ModelUsage) -> CompletionUsage:
265
+ return CompletionUsage(
266
+ completion_tokens=usage.output_tokens,
267
+ prompt_tokens=usage.input_tokens,
268
+ total_tokens=usage.total_tokens,
269
+ )
270
+
271
+
272
+ def openai_finish_reason(
273
+ stop_reason: StopReason
274
+ ) -> Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call']:
275
+ if stop_reason in ('stop', 'tool_calls', 'content_filter'):
276
+ return stop_reason
277
+ elif stop_reason == 'model_length':
278
+ return 'length'
279
+ else:
280
+ return 'stop'
281
+
282
+
283
+ def openai_chat_tool_param(tool: ToolInfo) -> ChatCompletionToolParam:
284
+ function = FunctionDefinition(
285
+ name=tool.name,
286
+ description=tool.description,
287
+ parameters=tool.parameters.model_dump(exclude_none=True),
288
+ )
289
+ return ChatCompletionToolParam(type='function', function=function)
290
+
291
+
292
+ def openai_chat_tools(tools: List[ToolInfo]) -> List[ChatCompletionToolParam]:
293
+ return [openai_chat_tool_param(tool) for tool in tools]
294
+
295
+
296
+ def openai_chat_tool_choice(tool_choice: ToolChoice, ) -> ChatCompletionToolChoiceOptionParam:
297
+ if isinstance(tool_choice, ToolFunction):
298
+ return ChatCompletionNamedToolChoiceParam(type='function', function=dict(name=tool_choice.name))
299
+ # openai supports 'any' via the 'required' keyword
300
+ elif tool_choice == 'any':
301
+ return 'required'
302
+ else:
303
+ return tool_choice
304
+
305
+
306
+ def chat_tool_calls_from_openai(message: ChatCompletionMessage, tools: List[ToolInfo]) -> Optional[List[ToolCall]]:
307
+ if message.tool_calls:
308
+ return [
309
+ parse_tool_call(call.id, call.function.name, call.function.arguments, tools) for call in message.tool_calls
310
+ ]
311
+ else:
312
+ return None
313
+
314
+
315
+ def chat_messages_from_openai(
316
+ model: str,
317
+ messages: List[ChatCompletionMessageParam],
318
+ ) -> List[ChatMessage]:
319
+ # track tool names by id
320
+ tool_names: Dict[str, str] = {}
321
+
322
+ chat_messages: List[ChatMessage] = []
323
+
324
+ for message in messages:
325
+ content: Union[str, List[Content]] = []
326
+ if message['role'] == 'system' or message['role'] == 'developer':
327
+ sys_content = message['content']
328
+ if isinstance(sys_content, str):
329
+ chat_messages.append(ChatMessageSystem(content=sys_content))
330
+ else:
331
+ content = []
332
+ for sc in sys_content:
333
+ content.extend(content_from_openai(sc))
334
+ chat_messages.append(ChatMessageSystem(content=content))
335
+ elif message['role'] == 'user':
336
+ user_content = message['content']
337
+ if isinstance(user_content, str):
338
+ chat_messages.append(ChatMessageUser(content=user_content))
339
+ else:
340
+ content = []
341
+ for uc in user_content:
342
+ content.extend(content_from_openai(uc))
343
+ chat_messages.append(ChatMessageUser(content=content))
344
+ elif message['role'] == 'assistant':
345
+ # resolve content
346
+ refusal: Optional[Literal[True]] = None
347
+ internal: Optional[JsonValue] = None
348
+ asst_content = message.get('content', None)
349
+ if isinstance(asst_content, str):
350
+ # Even though the choices API doesn't take advantage of .internal,
351
+ # we could be transforming from OpenAI choices to Inspect for agent
352
+ # bridge scenarios where a different model (that does use .internal)
353
+ # is the actual model being used.
354
+ asst_content, internal = _parse_content_with_internal(asst_content)
355
+ asst_content, smuggled_reasoning = parse_content_with_reasoning(asst_content)
356
+ if smuggled_reasoning:
357
+ content = [
358
+ smuggled_reasoning,
359
+ ContentText(text=asst_content),
360
+ ]
361
+ else:
362
+ content = asst_content
363
+ elif asst_content is None:
364
+ content = message.get('refusal', None) or ''
365
+ if content:
366
+ refusal = True
367
+ else:
368
+ content = []
369
+ for ac in asst_content:
370
+ content.extend(content_from_openai(ac, parse_reasoning=True))
371
+
372
+ # resolve reasoning (OpenAI doesn't suport this however OpenAI-compatible
373
+ # interfaces e.g. DeepSeek do include this field so we pluck it out)
374
+ reasoning = message.get('reasoning_content', None) or message.get('reasoning', None)
375
+ if reasoning is not None:
376
+ # normalize content to an array
377
+ if isinstance(content, str):
378
+ content = [ContentText(text=content, refusal=refusal)]
379
+
380
+ # insert reasoning
381
+ content.insert(0, ContentReasoning(reasoning=str(reasoning)))
382
+
383
+ # return message
384
+ if 'tool_calls' in message:
385
+ tool_calls: List[ToolCall] = []
386
+ for call in message['tool_calls']:
387
+ tool_calls.append(tool_call_from_openai(call))
388
+ tool_names[call['id']] = call['function']['name']
389
+
390
+ else:
391
+ tool_calls = []
392
+
393
+ chat_messages.append(
394
+ ChatMessageAssistant(
395
+ content=content,
396
+ tool_calls=tool_calls or None,
397
+ model=model,
398
+ source='generate',
399
+ internal=internal,
400
+ )
401
+ )
402
+ elif message['role'] == 'tool':
403
+ tool_content = message.get('content', None) or ''
404
+ if isinstance(tool_content, str):
405
+ # If tool_content is a simple str, it could be the result of some
406
+ # sub-agent tool call that has <think> or <internal> smuggled inside
407
+ # of it to support agent bridge scenarios. We have to strip that
408
+ # data. To be clear, if it's <think>, we'll strip the <think> tag,
409
+ # but the reasoning summary itself will remain in the content.
410
+ content, _ = _parse_content_with_internal(tool_content)
411
+ content, _ = parse_content_with_reasoning(content)
412
+ else:
413
+ content = []
414
+ for tc in tool_content:
415
+ content.extend(content_from_openai(tc))
416
+ chat_messages.append(
417
+ ChatMessageTool(
418
+ content=content,
419
+ tool_call_id=message['tool_call_id'],
420
+ function=tool_names.get(message['tool_call_id'], ''),
421
+ )
422
+ )
423
+ else:
424
+ raise ValueError(f'Unexpected message param type: {type(message)}')
425
+
426
+ return chat_messages
427
+
428
+
429
+ def tool_call_from_openai(tool_call: ChatCompletionMessageToolCallParam) -> ToolCall:
430
+ return parse_tool_call(
431
+ tool_call['id'],
432
+ tool_call['function']['name'],
433
+ tool_call['function']['arguments'],
434
+ )
435
+
436
+
437
+ def content_from_openai(
438
+ content: Union[ChatCompletionContentPartParam, ChatCompletionContentPartRefusalParam],
439
+ parse_reasoning: bool = False,
440
+ ) -> List[Content]:
441
+ # Some providers omit the type tag and use "object-with-a-single-field" encoding
442
+ if 'type' not in content and len(content) == 1:
443
+ content['type'] = list(content.keys())[0] # type: ignore[arg-type]
444
+ if content['type'] == 'text':
445
+ text = content['text']
446
+ if parse_reasoning:
447
+ content_text, content_reasoning = parse_content_with_reasoning(text)
448
+ if content_reasoning:
449
+ return [
450
+ content_reasoning,
451
+ ContentText(text=content_text),
452
+ ]
453
+ else:
454
+ return [ContentText(text=text)]
455
+ else:
456
+ return [ContentText(text=text)]
457
+ elif content['type'] == 'reasoning': # type: ignore[comparison-overlap]
458
+ return [ContentReasoning(reasoning=content['reasoning'])]
459
+ elif content['type'] == 'image_url':
460
+ return [ContentImage(image=content['image_url']['url'], detail=content['image_url']['detail'])]
461
+ elif content['type'] == 'input_audio':
462
+ return [ContentAudio(
463
+ audio=content['input_audio']['data'],
464
+ format=content['input_audio']['format'],
465
+ )]
466
+ elif content['type'] == 'refusal':
467
+ return [ContentText(text=content['refusal'], refusal=True)]
468
+ else:
469
+ content_type = content['type']
470
+ raise ValueError(f"Unexpected content type '{content_type}' in message.")
471
+
472
+
473
+ def chat_message_assistant_from_openai(
474
+ model: str, message: ChatCompletionMessage, tools: List[ToolInfo]
475
+ ) -> ChatMessageAssistant:
476
+ refusal = getattr(message, 'refusal', None)
477
+ reasoning = getattr(message, 'reasoning_content', None) or getattr(message, 'reasoning', None)
478
+
479
+ msg_content = refusal or message.content or ''
480
+ if reasoning is not None:
481
+ content: Union[str, List[Content]] = [
482
+ ContentReasoning(reasoning=str(reasoning)),
483
+ ContentText(text=msg_content, refusal=True if refusal else None),
484
+ ]
485
+ elif refusal is not None:
486
+ content = [ContentText(text=msg_content, refusal=True)]
487
+ else:
488
+ content = msg_content
489
+
490
+ return ChatMessageAssistant(
491
+ content=content,
492
+ model=model,
493
+ source='generate',
494
+ tool_calls=chat_tool_calls_from_openai(message, tools),
495
+ )
496
+
497
+
498
+ def model_output_from_openai(
499
+ completion: ChatCompletion,
500
+ choices: list[ChatCompletionChoice],
501
+ ) -> ModelOutput:
502
+ return ModelOutput(
503
+ model=completion.model,
504
+ choices=choices,
505
+ usage=(
506
+ ModelUsage(
507
+ input_tokens=completion.usage.prompt_tokens,
508
+ output_tokens=completion.usage.completion_tokens,
509
+ input_tokens_cache_read=(
510
+ completion.usage.prompt_tokens_details.cached_tokens if completion.usage.prompt_tokens_details
511
+ is not None else None # openai only have cache read stats/pricing.
512
+ ),
513
+ reasoning_tokens=(
514
+ completion.usage.completion_tokens_details.reasoning_tokens
515
+ if completion.usage.completion_tokens_details is not None else None
516
+ ),
517
+ total_tokens=completion.usage.total_tokens,
518
+ ) if completion.usage else None
519
+ ),
520
+ )
521
+
522
+
523
+ def chat_choices_from_openai(response: ChatCompletion, tools: List[ToolInfo]) -> List[ChatCompletionChoice]:
524
+ choices = list(response.choices)
525
+ choices.sort(key=lambda c: c.index)
526
+ return [
527
+ ChatCompletionChoice(
528
+ message=chat_message_assistant_from_openai(response.model, choice.message, tools),
529
+ stop_reason=as_stop_reason(choice.finish_reason),
530
+ logprobs=(
531
+ Logprobs(**choice.logprobs.model_dump())
532
+ if choice.logprobs and choice.logprobs.content is not None else None
533
+ ),
534
+ ) for choice in choices
535
+ ]
536
+
537
+
538
+ def openai_handle_bad_request(model_name: str, e: APIStatusError) -> Union[ModelOutput, Exception]:
539
+ # extract message
540
+ if isinstance(e.body, dict) and 'message' in e.body.keys():
541
+ content = str(e.body.get('message'))
542
+ else:
543
+ content = e.message
544
+
545
+ # narrow stop_reason
546
+ stop_reason: Optional[StopReason] = None
547
+ if e.code == 'context_length_exceeded':
548
+ stop_reason = 'model_length'
549
+ elif (
550
+ e.code == 'invalid_prompt' # seems to happen for o1/o3
551
+ or e.code == 'content_policy_violation' # seems to happen for vision
552
+ or e.code == 'content_filter' # seems to happen on azure
553
+ ):
554
+ stop_reason = 'content_filter'
555
+
556
+ if stop_reason:
557
+ return ModelOutput.from_content(model=model_name, content=content, stop_reason=stop_reason)
558
+ else:
559
+ raise e
560
+
561
+
562
+ def openai_media_filter(key: Optional[JsonValue], value: JsonValue) -> JsonValue:
563
+ # remove images from raw api call
564
+ if key == 'output' and isinstance(value, dict) and 'image_url' in value:
565
+ value = copy(value)
566
+ value.update(image_url=BASE_64_DATA_REMOVED)
567
+ if key == 'image_url' and isinstance(value, dict) and 'url' in value:
568
+ url = str(value.get('url'))
569
+ if url.startswith('data:'):
570
+ value = copy(value)
571
+ value.update(url=BASE_64_DATA_REMOVED)
572
+ elif key == 'input_audio' and isinstance(value, dict) and 'data' in value:
573
+ value = copy(value)
574
+ value.update(data=BASE_64_DATA_REMOVED)
575
+ return value
576
+
577
+
578
+ def _parse_content_with_internal(content: str, ) -> Tuple[str, Optional[JsonValue]]:
579
+ """
580
+ Extracts and removes a smuggled <internal>...</internal> tag from the content string, if present.
581
+
582
+ Note:
583
+ This OpenAI model does not natively use `.internal`. However, in bridge
584
+ scenarios—where output from a model that does use `.internal` is routed
585
+ through this code—such a tag may be present and should be handled.
586
+
587
+ Args:
588
+ content: The input string, possibly containing an <internal> tag with
589
+ base64-encoded JSON.
590
+
591
+ Returns:
592
+ tuple[str, JsonValue | None]:
593
+ - The content string with the <internal>...</internal> tag removed (if present), otherwise the original string.
594
+ - The decoded and parsed internal value (if present), otherwise None.
595
+
596
+ Raises:
597
+ json.JSONDecodeError: If the content of the <internal> tag is not valid JSON after decoding.
598
+ UnicodeDecodeError: If the content of the <internal> tag is not valid UTF-8 after base64 decoding.
599
+ """ # noqa: E501
600
+ internal_pattern = r'<internal>(.*?)</internal>'
601
+ internal_match = re.search(r'<internal>(.*?)</internal>', content, re.DOTALL)
602
+
603
+ return ((
604
+ re.sub(internal_pattern, '', content, flags=re.DOTALL).strip(),
605
+ json.loads(base64.b64decode(internal_match.group(1)).decode('utf-8')),
606
+ ) if internal_match else (content, None))
607
+
608
+
609
+ def collect_stream_response(response_stream: List[ChatCompletionChunk]) -> ChatCompletion:
610
+ collected_chunks: List[ChatCompletionChunk] = []
611
+ collected_messages = defaultdict(list)
612
+ collected_reasoning = defaultdict(list)
613
+ collected_tool_calls = defaultdict(dict)
614
+
615
+ for chunk in response_stream:
616
+ collected_chunks.append(chunk)
617
+ for choice in chunk.choices:
618
+ # Handle reasoning content
619
+ if hasattr(choice.delta, 'reasoning_content') and choice.delta.reasoning_content is not None:
620
+ collected_reasoning[choice.index].append(choice.delta.reasoning_content)
621
+
622
+ # Handle regular content
623
+ if choice.delta.content is not None:
624
+ collected_messages[choice.index].append(choice.delta.content)
625
+
626
+ # Handle tool calls
627
+ if hasattr(choice.delta, 'tool_calls') and choice.delta.tool_calls:
628
+ for tool_call in choice.delta.tool_calls:
629
+ tool_id = tool_call.index
630
+
631
+ # Initialize tool call if not present
632
+ if tool_id not in collected_tool_calls[choice.index]:
633
+ collected_tool_calls[choice.index][tool_id] = {
634
+ 'id': tool_call.id if hasattr(tool_call, 'id') and tool_call.id else None,
635
+ 'type': tool_call.type if hasattr(tool_call, 'type') and tool_call.type else None,
636
+ 'function': {
637
+ 'name': '',
638
+ 'arguments': ''
639
+ }
640
+ }
641
+
642
+ # Update tool call with new chunks
643
+ if hasattr(tool_call, 'function'):
644
+ if hasattr(tool_call.function, 'name') and tool_call.function.name:
645
+ collected_tool_calls[choice.index][tool_id]['function']['name'] = tool_call.function.name
646
+
647
+ if hasattr(tool_call.function, 'arguments') and tool_call.function.arguments:
648
+ collected_tool_calls[choice.index
649
+ ][tool_id]['function']['arguments'] += tool_call.function.arguments
650
+
651
+ # Update ID if it was received later
652
+ if hasattr(tool_call, 'id') and tool_call.id:
653
+ collected_tool_calls[choice.index][tool_id]['id'] = tool_call.id
654
+
655
+ # Get all unique choice indices from all collections
656
+ all_indices = set(collected_messages.keys()) | set(collected_reasoning.keys()) | set(collected_tool_calls.keys())
657
+
658
+ choices = []
659
+ for index in all_indices:
660
+ full_reply_content = ''.join(collected_messages.get(index, []))
661
+ reasoning = ''.join(collected_reasoning.get(index, []))
662
+
663
+ # Process tool_calls for this choice if any exists
664
+ tool_calls_list = None
665
+ if index in collected_tool_calls and collected_tool_calls[index]:
666
+ tool_calls_list = list(collected_tool_calls[index].values())
667
+ # Filter out any tool calls with None id (incomplete tool calls)
668
+ tool_calls_list = [tc for tc in tool_calls_list if tc['id'] is not None]
669
+
670
+ # use the finish_reason from the last chunk that generated this choice
671
+ finish_reason = None
672
+ for chunk in reversed(collected_chunks):
673
+ if chunk.choices and chunk.choices[0].index == index:
674
+ finish_reason = chunk.choices[0].finish_reason
675
+ break
676
+
677
+ message_kwargs = {'role': 'assistant', 'content': full_reply_content}
678
+
679
+ if reasoning:
680
+ message_kwargs['reasoning_content'] = reasoning
681
+
682
+ if tool_calls_list:
683
+ message_kwargs['tool_calls'] = tool_calls_list
684
+
685
+ choice = Choice(
686
+ finish_reason=finish_reason or 'stop', index=index, message=ChatCompletionMessage(**message_kwargs)
687
+ )
688
+ choices.append(choice)
689
+
690
+ # build the final completion object
691
+ return ChatCompletion(
692
+ id=collected_chunks[0].id,
693
+ choices=choices,
694
+ created=collected_chunks[0].created,
695
+ model=collected_chunks[0].model,
696
+ object='chat.completion',
697
+ usage=collected_chunks[-1].usage # use the usage from the last chunk
698
+ )