dashscope 1.8.0__py3-none-any.whl → 1.25.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dashscope/__init__.py +61 -14
- dashscope/aigc/__init__.py +10 -3
- dashscope/aigc/chat_completion.py +282 -0
- dashscope/aigc/code_generation.py +145 -0
- dashscope/aigc/conversation.py +71 -12
- dashscope/aigc/generation.py +288 -16
- dashscope/aigc/image_synthesis.py +473 -31
- dashscope/aigc/multimodal_conversation.py +299 -14
- dashscope/aigc/video_synthesis.py +610 -0
- dashscope/api_entities/aiohttp_request.py +8 -5
- dashscope/api_entities/api_request_data.py +4 -2
- dashscope/api_entities/api_request_factory.py +68 -20
- dashscope/api_entities/base_request.py +20 -3
- dashscope/api_entities/chat_completion_types.py +344 -0
- dashscope/api_entities/dashscope_response.py +243 -15
- dashscope/api_entities/encryption.py +179 -0
- dashscope/api_entities/http_request.py +216 -62
- dashscope/api_entities/websocket_request.py +43 -34
- dashscope/app/__init__.py +5 -0
- dashscope/app/application.py +203 -0
- dashscope/app/application_response.py +246 -0
- dashscope/assistants/__init__.py +16 -0
- dashscope/assistants/assistant_types.py +175 -0
- dashscope/assistants/assistants.py +311 -0
- dashscope/assistants/files.py +197 -0
- dashscope/audio/__init__.py +4 -2
- dashscope/audio/asr/__init__.py +17 -1
- dashscope/audio/asr/asr_phrase_manager.py +203 -0
- dashscope/audio/asr/recognition.py +167 -27
- dashscope/audio/asr/transcription.py +107 -14
- dashscope/audio/asr/translation_recognizer.py +1006 -0
- dashscope/audio/asr/vocabulary.py +177 -0
- dashscope/audio/qwen_asr/__init__.py +7 -0
- dashscope/audio/qwen_asr/qwen_transcription.py +189 -0
- dashscope/audio/qwen_omni/__init__.py +11 -0
- dashscope/audio/qwen_omni/omni_realtime.py +524 -0
- dashscope/audio/qwen_tts/__init__.py +5 -0
- dashscope/audio/qwen_tts/speech_synthesizer.py +77 -0
- dashscope/audio/qwen_tts_realtime/__init__.py +10 -0
- dashscope/audio/qwen_tts_realtime/qwen_tts_realtime.py +355 -0
- dashscope/audio/tts/__init__.py +2 -0
- dashscope/audio/tts/speech_synthesizer.py +5 -0
- dashscope/audio/tts_v2/__init__.py +12 -0
- dashscope/audio/tts_v2/enrollment.py +179 -0
- dashscope/audio/tts_v2/speech_synthesizer.py +886 -0
- dashscope/cli.py +157 -37
- dashscope/client/base_api.py +652 -87
- dashscope/common/api_key.py +2 -0
- dashscope/common/base_type.py +135 -0
- dashscope/common/constants.py +13 -16
- dashscope/common/env.py +2 -0
- dashscope/common/error.py +58 -22
- dashscope/common/logging.py +2 -0
- dashscope/common/message_manager.py +2 -0
- dashscope/common/utils.py +276 -46
- dashscope/customize/__init__.py +0 -0
- dashscope/customize/customize_types.py +192 -0
- dashscope/customize/deployments.py +146 -0
- dashscope/customize/finetunes.py +234 -0
- dashscope/embeddings/__init__.py +5 -1
- dashscope/embeddings/batch_text_embedding.py +208 -0
- dashscope/embeddings/batch_text_embedding_response.py +65 -0
- dashscope/embeddings/multimodal_embedding.py +118 -10
- dashscope/embeddings/text_embedding.py +13 -1
- dashscope/{file.py → files.py} +19 -4
- dashscope/io/input_output.py +2 -0
- dashscope/model.py +11 -2
- dashscope/models.py +43 -0
- dashscope/multimodal/__init__.py +20 -0
- dashscope/multimodal/dialog_state.py +56 -0
- dashscope/multimodal/multimodal_constants.py +28 -0
- dashscope/multimodal/multimodal_dialog.py +648 -0
- dashscope/multimodal/multimodal_request_params.py +313 -0
- dashscope/multimodal/tingwu/__init__.py +10 -0
- dashscope/multimodal/tingwu/tingwu.py +80 -0
- dashscope/multimodal/tingwu/tingwu_realtime.py +579 -0
- dashscope/nlp/__init__.py +0 -0
- dashscope/nlp/understanding.py +64 -0
- dashscope/protocol/websocket.py +3 -0
- dashscope/rerank/__init__.py +0 -0
- dashscope/rerank/text_rerank.py +69 -0
- dashscope/resources/qwen.tiktoken +151643 -0
- dashscope/threads/__init__.py +26 -0
- dashscope/threads/messages/__init__.py +0 -0
- dashscope/threads/messages/files.py +113 -0
- dashscope/threads/messages/messages.py +220 -0
- dashscope/threads/runs/__init__.py +0 -0
- dashscope/threads/runs/runs.py +501 -0
- dashscope/threads/runs/steps.py +112 -0
- dashscope/threads/thread_types.py +665 -0
- dashscope/threads/threads.py +212 -0
- dashscope/tokenizers/__init__.py +7 -0
- dashscope/tokenizers/qwen_tokenizer.py +111 -0
- dashscope/tokenizers/tokenization.py +125 -0
- dashscope/tokenizers/tokenizer.py +45 -0
- dashscope/tokenizers/tokenizer_base.py +32 -0
- dashscope/utils/__init__.py +0 -0
- dashscope/utils/message_utils.py +838 -0
- dashscope/utils/oss_utils.py +243 -0
- dashscope/utils/param_utils.py +29 -0
- dashscope/version.py +3 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/METADATA +53 -50
- dashscope-1.25.6.dist-info/RECORD +112 -0
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/WHEEL +1 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/entry_points.txt +0 -1
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info/licenses}/LICENSE +2 -4
- dashscope/deployment.py +0 -129
- dashscope/finetune.py +0 -149
- dashscope-1.8.0.dist-info/RECORD +0 -49
- {dashscope-1.8.0.dist-info → dashscope-1.25.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,838 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
import copy
|
|
3
|
+
|
|
4
|
+
def merge_single_response(parsed_response, accumulated_data, n=1):
|
|
5
|
+
"""Merge a single response chunk with accumulated data.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
parsed_response: The response chunk to merge
|
|
9
|
+
accumulated_data: Dictionary storing accumulated data for each choice
|
|
10
|
+
n: Number of expected choices (default 1)
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
bool or list: True if this response should be yielded normally,
|
|
14
|
+
False if filtered, or a list of responses for n>1 with
|
|
15
|
+
non-stop finish reasons
|
|
16
|
+
"""
|
|
17
|
+
# Check if all choices have been sent (for n > 1 case)
|
|
18
|
+
if n > 1 and accumulated_data:
|
|
19
|
+
all_sent = all(data.get('all_choices_sent', False)
|
|
20
|
+
for data in accumulated_data.values()
|
|
21
|
+
if isinstance(data, dict) and 'all_choices_sent' in data)
|
|
22
|
+
if all_sent:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
# Track usage for each choice index when n > 1
|
|
26
|
+
# Each streaming packet contains usage info for one specific choice
|
|
27
|
+
if (n > 1 and parsed_response.usage and
|
|
28
|
+
parsed_response.output and parsed_response.output.choices and
|
|
29
|
+
len(parsed_response.output.choices) > 0):
|
|
30
|
+
if 'usage_by_index' not in accumulated_data:
|
|
31
|
+
accumulated_data['usage_by_index'] = {}
|
|
32
|
+
|
|
33
|
+
# Get the choice index from the first (and typically only) choice in this packet
|
|
34
|
+
try:
|
|
35
|
+
first_choice = parsed_response.output.choices[0]
|
|
36
|
+
choice_idx = first_choice.index if hasattr(
|
|
37
|
+
first_choice, 'index') and 'index' in first_choice else 0
|
|
38
|
+
|
|
39
|
+
# Store only output_tokens for this choice index
|
|
40
|
+
if 'output_tokens' in parsed_response.usage:
|
|
41
|
+
accumulated_data['usage_by_index'][choice_idx] = dict(
|
|
42
|
+
parsed_response.usage)
|
|
43
|
+
except (KeyError, AttributeError, IndexError):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
# Handle output.text accumulation when choices is null
|
|
47
|
+
if (parsed_response.output and
|
|
48
|
+
hasattr(parsed_response.output, 'text') and
|
|
49
|
+
(not parsed_response.output.choices or parsed_response.output.choices is None)):
|
|
50
|
+
choice_idx = 0
|
|
51
|
+
if choice_idx not in accumulated_data:
|
|
52
|
+
accumulated_data[choice_idx] = {
|
|
53
|
+
'content': '',
|
|
54
|
+
'reasoning_content': '',
|
|
55
|
+
'tool_calls': [],
|
|
56
|
+
'logprobs': {'content': []},
|
|
57
|
+
'finished': False,
|
|
58
|
+
'finish_reason': None,
|
|
59
|
+
'all_choices_sent': False,
|
|
60
|
+
'role': None
|
|
61
|
+
}
|
|
62
|
+
# Accumulate text if not empty
|
|
63
|
+
if parsed_response.output.text:
|
|
64
|
+
accumulated_data[choice_idx]['content'] += parsed_response.output.text
|
|
65
|
+
# Always set accumulated content back to response
|
|
66
|
+
parsed_response.output.text = accumulated_data[choice_idx]['content']
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
# Process each choice in the choices array
|
|
70
|
+
if parsed_response.output and parsed_response.output.choices:
|
|
71
|
+
choices = parsed_response.output.choices
|
|
72
|
+
|
|
73
|
+
# Filter out empty choices array
|
|
74
|
+
if not choices:
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
for choice_enum_idx, choice in enumerate(choices):
|
|
78
|
+
# Use choice.index if available, otherwise use enumerate index
|
|
79
|
+
try:
|
|
80
|
+
choice_idx = choice.index if hasattr(choice, 'index') and 'index' in choice else choice_enum_idx
|
|
81
|
+
except (KeyError, AttributeError):
|
|
82
|
+
choice_idx = choice_enum_idx
|
|
83
|
+
|
|
84
|
+
# Initialize accumulated data for this choice if not exists
|
|
85
|
+
if choice_idx not in accumulated_data:
|
|
86
|
+
accumulated_data[choice_idx] = {
|
|
87
|
+
'content': '',
|
|
88
|
+
'reasoning_content': '',
|
|
89
|
+
'tool_calls': [],
|
|
90
|
+
'logprobs': {'content': []},
|
|
91
|
+
'finished': False,
|
|
92
|
+
'finish_reason': None,
|
|
93
|
+
'all_choices_sent': False,
|
|
94
|
+
'role': None
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# Handle message field - create if null
|
|
98
|
+
if not choice.message:
|
|
99
|
+
# Create message object with accumulated data
|
|
100
|
+
choice.message = {
|
|
101
|
+
'role': accumulated_data[choice_idx]['role'] if accumulated_data[choice_idx]['role'] else 'assistant',
|
|
102
|
+
'content': accumulated_data[choice_idx]['content']
|
|
103
|
+
}
|
|
104
|
+
if accumulated_data[choice_idx]['reasoning_content']:
|
|
105
|
+
choice.message['reasoning_content'] = accumulated_data[choice_idx]['reasoning_content']
|
|
106
|
+
if accumulated_data[choice_idx]['tool_calls']:
|
|
107
|
+
choice.message['tool_calls'] = accumulated_data[choice_idx]['tool_calls']
|
|
108
|
+
else:
|
|
109
|
+
# Save role if present
|
|
110
|
+
if hasattr(choice.message, 'role') and choice.message.role:
|
|
111
|
+
accumulated_data[choice_idx]['role'] = choice.message.role
|
|
112
|
+
|
|
113
|
+
# Handle content accumulation
|
|
114
|
+
if 'content' in choice.message:
|
|
115
|
+
current_content = choice.message.content
|
|
116
|
+
if current_content:
|
|
117
|
+
# Check if content is multimodal format
|
|
118
|
+
if isinstance(current_content, list):
|
|
119
|
+
# Handle multimodal content (array format)
|
|
120
|
+
# Initialize accumulated content as array if not already
|
|
121
|
+
if not isinstance(accumulated_data[choice_idx]['content'], list):
|
|
122
|
+
accumulated_data[choice_idx]['content'] = []
|
|
123
|
+
|
|
124
|
+
# Ensure accumulated content list has enough elements
|
|
125
|
+
while len(accumulated_data[choice_idx]['content']) < len(current_content):
|
|
126
|
+
accumulated_data[choice_idx]['content'].append({'text': ''})
|
|
127
|
+
|
|
128
|
+
# Merge each content element
|
|
129
|
+
for content_idx, content_item in enumerate(current_content):
|
|
130
|
+
if isinstance(content_item, dict) and 'text' in content_item:
|
|
131
|
+
if content_item['text']:
|
|
132
|
+
# Accumulate text content
|
|
133
|
+
accumulated_data[choice_idx]['content'][content_idx]['text'] += content_item['text']
|
|
134
|
+
# Update the current response with accumulated content
|
|
135
|
+
for content_idx in range(len(accumulated_data[choice_idx]['content'])):
|
|
136
|
+
if content_idx < len(choice.message.content):
|
|
137
|
+
choice.message.content[content_idx]['text'] = accumulated_data[choice_idx]['content'][content_idx]['text']
|
|
138
|
+
else:
|
|
139
|
+
# Handle regular content (string format)
|
|
140
|
+
# Initialize accumulated content as string
|
|
141
|
+
if isinstance(accumulated_data[choice_idx]['content'], list):
|
|
142
|
+
accumulated_data[choice_idx]['content'] = ''
|
|
143
|
+
# Accumulate content if not empty
|
|
144
|
+
accumulated_data[choice_idx]['content'] += current_content
|
|
145
|
+
# Always set accumulated content back to response
|
|
146
|
+
if not isinstance(accumulated_data[choice_idx]['content'], list):
|
|
147
|
+
choice.message.content = accumulated_data[choice_idx]['content']
|
|
148
|
+
else:
|
|
149
|
+
# For multimodal content, ensure message.content
|
|
150
|
+
# exists
|
|
151
|
+
if not isinstance(choice.message.content, list):
|
|
152
|
+
choice.message.content = accumulated_data[choice_idx]['content']
|
|
153
|
+
|
|
154
|
+
# Handle reasoning_content accumulation
|
|
155
|
+
if 'reasoning_content' in choice.message:
|
|
156
|
+
current_reasoning_content = choice.message.reasoning_content
|
|
157
|
+
if current_reasoning_content:
|
|
158
|
+
accumulated_data[choice_idx]['reasoning_content'] += current_reasoning_content
|
|
159
|
+
# Always set the accumulated reasoning_content back if we
|
|
160
|
+
# have any, even if current response doesn't have it
|
|
161
|
+
if accumulated_data[choice_idx]['reasoning_content']:
|
|
162
|
+
choice.message.reasoning_content = accumulated_data[choice_idx]['reasoning_content']
|
|
163
|
+
|
|
164
|
+
# Handle tool_calls accumulation
|
|
165
|
+
if 'tool_calls' in choice.message and choice.message.tool_calls:
|
|
166
|
+
current_tool_calls = choice.message.tool_calls
|
|
167
|
+
|
|
168
|
+
# For each current tool call, accumulate its arguments
|
|
169
|
+
for current_call in current_tool_calls:
|
|
170
|
+
if isinstance(current_call, dict) and 'index' in current_call:
|
|
171
|
+
idx = current_call['index']
|
|
172
|
+
|
|
173
|
+
# Find existing accumulated call with same index
|
|
174
|
+
existing_call = None
|
|
175
|
+
for acc_call in accumulated_data[choice_idx]['tool_calls']:
|
|
176
|
+
if (isinstance(acc_call, dict) and
|
|
177
|
+
acc_call.get('index') == idx):
|
|
178
|
+
existing_call = acc_call
|
|
179
|
+
break
|
|
180
|
+
|
|
181
|
+
if existing_call:
|
|
182
|
+
# Accumulate function fields from current call
|
|
183
|
+
if ('function' in current_call and
|
|
184
|
+
current_call['function']):
|
|
185
|
+
if 'function' not in existing_call:
|
|
186
|
+
existing_call['function'] = {}
|
|
187
|
+
|
|
188
|
+
# Accumulate function.name
|
|
189
|
+
if 'name' in current_call['function']:
|
|
190
|
+
if 'name' not in existing_call['function']:
|
|
191
|
+
existing_call['function']['name'] = ''
|
|
192
|
+
existing_call['function']['name'] += current_call['function']['name']
|
|
193
|
+
|
|
194
|
+
# Accumulate function.arguments
|
|
195
|
+
if 'arguments' in current_call['function']:
|
|
196
|
+
if 'arguments' not in existing_call['function']:
|
|
197
|
+
existing_call['function']['arguments'] = ''
|
|
198
|
+
existing_call['function']['arguments'] += current_call['function']['arguments']
|
|
199
|
+
|
|
200
|
+
# Update other fields with latest values
|
|
201
|
+
existing_call.update({k: v for k, v in current_call.items()
|
|
202
|
+
if k != 'function' and v})
|
|
203
|
+
if 'function' in current_call and current_call['function']:
|
|
204
|
+
existing_call['function'].update({k: v for k, v in current_call['function'].items()
|
|
205
|
+
if k not in ['arguments', 'name'] and v})
|
|
206
|
+
else:
|
|
207
|
+
# Add new tool call
|
|
208
|
+
accumulated_data[choice_idx]['tool_calls'].append(dict(current_call))
|
|
209
|
+
|
|
210
|
+
# Update choice with accumulated tool_calls
|
|
211
|
+
choice.message.tool_calls = accumulated_data[choice_idx]['tool_calls']
|
|
212
|
+
elif accumulated_data[choice_idx]['tool_calls']:
|
|
213
|
+
# If current response has no tool_calls but we have
|
|
214
|
+
# accumulated tool_calls, restore them
|
|
215
|
+
choice.message.tool_calls = accumulated_data[choice_idx]['tool_calls']
|
|
216
|
+
|
|
217
|
+
# Restore role if we have it
|
|
218
|
+
if accumulated_data[choice_idx]['role'] and (not hasattr(choice.message, 'role') or not choice.message.role):
|
|
219
|
+
choice.message.role = accumulated_data[choice_idx]['role']
|
|
220
|
+
|
|
221
|
+
# Handle logprobs accumulation (only if logprobs exists)
|
|
222
|
+
try:
|
|
223
|
+
if ('logprobs' in choice and choice.logprobs and
|
|
224
|
+
isinstance(choice.logprobs, dict) and 'content' in choice.logprobs):
|
|
225
|
+
current_logprobs_content = choice.logprobs['content']
|
|
226
|
+
if current_logprobs_content and isinstance(current_logprobs_content, list):
|
|
227
|
+
# Initialize logprobs content if not exists
|
|
228
|
+
if 'logprobs' not in accumulated_data[choice_idx]:
|
|
229
|
+
accumulated_data[choice_idx]['logprobs'] = {'content': []}
|
|
230
|
+
elif 'content' not in accumulated_data[choice_idx]['logprobs']:
|
|
231
|
+
accumulated_data[choice_idx]['logprobs']['content'] = []
|
|
232
|
+
|
|
233
|
+
# Extend the accumulated logprobs content array
|
|
234
|
+
accumulated_data[choice_idx]['logprobs']['content'].extend(current_logprobs_content)
|
|
235
|
+
except (KeyError, AttributeError, TypeError):
|
|
236
|
+
# logprobs field might not exist or be in unexpected format, safely skip
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
# Always set accumulated logprobs if we have any
|
|
240
|
+
if (accumulated_data[choice_idx]['logprobs']['content'] and
|
|
241
|
+
hasattr(choice, 'logprobs') and choice.logprobs):
|
|
242
|
+
choice.logprobs['content'] = accumulated_data[choice_idx][
|
|
243
|
+
'logprobs']['content']
|
|
244
|
+
|
|
245
|
+
# Handle finish_reason for n > 1 case
|
|
246
|
+
if (n > 1 and hasattr(choice, 'finish_reason') and
|
|
247
|
+
choice.finish_reason and
|
|
248
|
+
choice.finish_reason != 'null'):
|
|
249
|
+
accumulated_data[choice_idx]['finish_reason'] = \
|
|
250
|
+
choice.finish_reason
|
|
251
|
+
accumulated_data[choice_idx]['finished'] = True
|
|
252
|
+
|
|
253
|
+
# Handle n > 1 case: different strategies for different finish_reason
|
|
254
|
+
if n > 1:
|
|
255
|
+
# Count finished choices
|
|
256
|
+
finished_count = sum(1 for data in accumulated_data.values()
|
|
257
|
+
if isinstance(data, dict) and
|
|
258
|
+
data.get('finished', False))
|
|
259
|
+
|
|
260
|
+
# Find all finished choices in current packet
|
|
261
|
+
finished_choices_in_packet = []
|
|
262
|
+
for choice in choices:
|
|
263
|
+
if (hasattr(choice, 'finish_reason') and
|
|
264
|
+
choice.finish_reason and
|
|
265
|
+
choice.finish_reason != 'null'):
|
|
266
|
+
choice_idx = (choice.index if hasattr(choice, 'index') and
|
|
267
|
+
'index' in choice else 0)
|
|
268
|
+
finish_reason = choice.finish_reason
|
|
269
|
+
finished_choices_in_packet.append(
|
|
270
|
+
(choice_idx, finish_reason, choice))
|
|
271
|
+
|
|
272
|
+
# No finish_reason in current packet: return as is
|
|
273
|
+
if not finished_choices_in_packet:
|
|
274
|
+
return True
|
|
275
|
+
|
|
276
|
+
# Get finish_reason type from first finished choice
|
|
277
|
+
first_finish_reason = finished_choices_in_packet[0][1]
|
|
278
|
+
|
|
279
|
+
# For stop: wait all choices, then merge into one result
|
|
280
|
+
if first_finish_reason == 'stop':
|
|
281
|
+
if finished_count < n:
|
|
282
|
+
# Hide finish_reason until all finished
|
|
283
|
+
for choice in choices:
|
|
284
|
+
if (hasattr(choice, 'finish_reason') and
|
|
285
|
+
choice.finish_reason and
|
|
286
|
+
choice.finish_reason != 'null'):
|
|
287
|
+
choice.finish_reason = 'null'
|
|
288
|
+
else:
|
|
289
|
+
# All finished: merge all choices into one result
|
|
290
|
+
for data in accumulated_data.values():
|
|
291
|
+
if isinstance(data, dict) and 'all_choices_sent' in data:
|
|
292
|
+
data['all_choices_sent'] = True
|
|
293
|
+
|
|
294
|
+
# Return final result with all choices
|
|
295
|
+
all_choices = []
|
|
296
|
+
# Sort by choice_idx to ensure correct order
|
|
297
|
+
sorted_items = sorted(
|
|
298
|
+
[(idx, data) for idx, data in accumulated_data.items()
|
|
299
|
+
if isinstance(data, dict) and 'finished' in data],
|
|
300
|
+
key=lambda x: x[0]
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
for choice_idx, data in sorted_items:
|
|
304
|
+
# Create a new choice object
|
|
305
|
+
final_choice_dict = {
|
|
306
|
+
'index': choice_idx,
|
|
307
|
+
'finish_reason': data['finish_reason']
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
# Create message
|
|
311
|
+
message_dict = {
|
|
312
|
+
'role': data['role'] if data['role'] else 'assistant'
|
|
313
|
+
}
|
|
314
|
+
if data['content']:
|
|
315
|
+
message_dict['content'] = (
|
|
316
|
+
data['content'] if isinstance(data['content'], str)
|
|
317
|
+
else data['content'])
|
|
318
|
+
if data['reasoning_content']:
|
|
319
|
+
message_dict['reasoning_content'] = data['reasoning_content']
|
|
320
|
+
if data['tool_calls']:
|
|
321
|
+
message_dict['tool_calls'] = data['tool_calls']
|
|
322
|
+
|
|
323
|
+
final_choice_dict['message'] = message_dict
|
|
324
|
+
|
|
325
|
+
# Add logprobs if present
|
|
326
|
+
if data['logprobs']['content']:
|
|
327
|
+
final_choice_dict['logprobs'] = {
|
|
328
|
+
'content': data['logprobs']['content']
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
all_choices.append(final_choice_dict)
|
|
332
|
+
|
|
333
|
+
# Update output choices with all accumulated choices
|
|
334
|
+
parsed_response.output.choices = all_choices
|
|
335
|
+
|
|
336
|
+
# Aggregate usage from all choice indices
|
|
337
|
+
if 'usage_by_index' in accumulated_data and accumulated_data[
|
|
338
|
+
'usage_by_index']:
|
|
339
|
+
aggregated_usage = {}
|
|
340
|
+
usage_by_idx = accumulated_data['usage_by_index']
|
|
341
|
+
|
|
342
|
+
# Sum output_tokens and recalculate total_tokens
|
|
343
|
+
total_output_tokens = 0
|
|
344
|
+
input_tokens = None
|
|
345
|
+
prompt_tokens_details = None
|
|
346
|
+
|
|
347
|
+
for idx, usage in usage_by_idx.items():
|
|
348
|
+
if 'output_tokens' in usage:
|
|
349
|
+
total_output_tokens += usage['output_tokens']
|
|
350
|
+
# input_tokens should be the same for all indices
|
|
351
|
+
if input_tokens is None and 'input_tokens' in usage:
|
|
352
|
+
input_tokens = usage['input_tokens']
|
|
353
|
+
# Keep prompt_tokens_details from any index
|
|
354
|
+
# (should be same)
|
|
355
|
+
if (prompt_tokens_details is None and
|
|
356
|
+
'prompt_tokens_details' in usage):
|
|
357
|
+
prompt_tokens_details = usage[
|
|
358
|
+
'prompt_tokens_details']
|
|
359
|
+
|
|
360
|
+
# Build aggregated usage
|
|
361
|
+
if input_tokens is not None:
|
|
362
|
+
aggregated_usage['input_tokens'] = input_tokens
|
|
363
|
+
aggregated_usage['output_tokens'] = total_output_tokens
|
|
364
|
+
if input_tokens is not None:
|
|
365
|
+
aggregated_usage['total_tokens'] = (
|
|
366
|
+
input_tokens + total_output_tokens)
|
|
367
|
+
if prompt_tokens_details is not None:
|
|
368
|
+
aggregated_usage['prompt_tokens_details'] = (
|
|
369
|
+
prompt_tokens_details)
|
|
370
|
+
|
|
371
|
+
# Update response usage with aggregated values
|
|
372
|
+
parsed_response.usage = aggregated_usage
|
|
373
|
+
else:
|
|
374
|
+
# For non-stop (e.g., tool_calls): output each choice separately
|
|
375
|
+
responses_to_yield = []
|
|
376
|
+
|
|
377
|
+
for choice_idx, finish_reason, choice in finished_choices_in_packet:
|
|
378
|
+
current_data = accumulated_data.get(choice_idx)
|
|
379
|
+
if (current_data is None or
|
|
380
|
+
current_data.get('all_choices_sent', False)):
|
|
381
|
+
continue
|
|
382
|
+
|
|
383
|
+
current_data['all_choices_sent'] = True
|
|
384
|
+
|
|
385
|
+
# Create a new response for this choice
|
|
386
|
+
if responses_to_yield:
|
|
387
|
+
# Clone the response for additional choices
|
|
388
|
+
new_response = copy.deepcopy(parsed_response)
|
|
389
|
+
else:
|
|
390
|
+
# Use the original response for the first choice
|
|
391
|
+
new_response = parsed_response
|
|
392
|
+
|
|
393
|
+
# Deep copy choice to avoid modifying accumulated_data
|
|
394
|
+
choice_copy = copy.deepcopy(choice)
|
|
395
|
+
|
|
396
|
+
# Set only this choice in the response
|
|
397
|
+
new_response.output.choices = [choice_copy]
|
|
398
|
+
|
|
399
|
+
# Update usage with this choice's output tokens
|
|
400
|
+
if (new_response.usage and
|
|
401
|
+
'usage_by_index' in accumulated_data and
|
|
402
|
+
choice_idx in accumulated_data['usage_by_index']):
|
|
403
|
+
current_usage = accumulated_data['usage_by_index'][
|
|
404
|
+
choice_idx]
|
|
405
|
+
if 'output_tokens' in current_usage:
|
|
406
|
+
new_response.usage['output_tokens'] = (
|
|
407
|
+
current_usage['output_tokens'])
|
|
408
|
+
if 'input_tokens' in current_usage:
|
|
409
|
+
new_response.usage['total_tokens'] = (
|
|
410
|
+
current_usage['input_tokens'] +
|
|
411
|
+
current_usage['output_tokens'])
|
|
412
|
+
|
|
413
|
+
responses_to_yield.append(new_response)
|
|
414
|
+
|
|
415
|
+
# Return list of responses if we have any
|
|
416
|
+
if responses_to_yield:
|
|
417
|
+
return responses_to_yield
|
|
418
|
+
else:
|
|
419
|
+
return False
|
|
420
|
+
|
|
421
|
+
return True
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def merge_multimodal_single_response(parsed_response, accumulated_data, n=1):
|
|
425
|
+
"""Merge a single response chunk with accumulated data.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
parsed_response: The response chunk to merge
|
|
429
|
+
accumulated_data: Dictionary storing accumulated data for each choice
|
|
430
|
+
n: Number of expected choices (default 1)
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
bool: True if this response should be yielded, False if filtered
|
|
434
|
+
"""
|
|
435
|
+
# Check if all choices have been sent (for n > 1 case)
|
|
436
|
+
if n > 1 and accumulated_data:
|
|
437
|
+
all_sent = any(data.get('all_choices_sent', False)
|
|
438
|
+
for data in accumulated_data.values())
|
|
439
|
+
if all_sent:
|
|
440
|
+
return False
|
|
441
|
+
|
|
442
|
+
# Track usage for each choice index when n > 1
|
|
443
|
+
# Each streaming packet contains usage info for one specific choice
|
|
444
|
+
if (n > 1 and parsed_response.usage and
|
|
445
|
+
parsed_response.output and parsed_response.output.choices and
|
|
446
|
+
len(parsed_response.output.choices) > 0):
|
|
447
|
+
if 'usage_by_index' not in accumulated_data:
|
|
448
|
+
accumulated_data['usage_by_index'] = {}
|
|
449
|
+
|
|
450
|
+
# Get the choice index from the first (and typically only) choice in this packet
|
|
451
|
+
try:
|
|
452
|
+
first_choice = parsed_response.output.choices[0]
|
|
453
|
+
choice_idx = first_choice.index if hasattr(
|
|
454
|
+
first_choice, 'index') and 'index' in first_choice else 0
|
|
455
|
+
|
|
456
|
+
# Store only output_tokens for this choice index
|
|
457
|
+
if 'output_tokens' in parsed_response.usage:
|
|
458
|
+
accumulated_data['usage_by_index'][choice_idx] = dict(
|
|
459
|
+
parsed_response.usage)
|
|
460
|
+
except (KeyError, AttributeError, IndexError):
|
|
461
|
+
pass
|
|
462
|
+
|
|
463
|
+
# Handle output.text accumulation when choices is null
|
|
464
|
+
if (parsed_response.output and
|
|
465
|
+
hasattr(parsed_response.output, 'text') and
|
|
466
|
+
(not parsed_response.output.choices or parsed_response.output.choices is None)):
|
|
467
|
+
choice_idx = 0
|
|
468
|
+
if choice_idx not in accumulated_data:
|
|
469
|
+
accumulated_data[choice_idx] = {
|
|
470
|
+
'content': '',
|
|
471
|
+
'reasoning_content': '',
|
|
472
|
+
'tool_calls': [],
|
|
473
|
+
'logprobs': {'content': []},
|
|
474
|
+
'finished': False,
|
|
475
|
+
'finish_reason': None,
|
|
476
|
+
'all_choices_sent': False,
|
|
477
|
+
'role': None
|
|
478
|
+
}
|
|
479
|
+
# Accumulate text if not empty
|
|
480
|
+
if parsed_response.output.text:
|
|
481
|
+
accumulated_data[choice_idx]['content'] += parsed_response.output.text
|
|
482
|
+
# Always set accumulated content back to response
|
|
483
|
+
parsed_response.output.text = accumulated_data[choice_idx]['content']
|
|
484
|
+
return True
|
|
485
|
+
|
|
486
|
+
# Process each choice in the choices array
|
|
487
|
+
if parsed_response.output and parsed_response.output.choices:
|
|
488
|
+
choices = parsed_response.output.choices
|
|
489
|
+
|
|
490
|
+
# Filter out empty choices array
|
|
491
|
+
if not choices:
|
|
492
|
+
return False
|
|
493
|
+
|
|
494
|
+
for choice_enum_idx, choice in enumerate(choices):
|
|
495
|
+
# Use choice.index if available, otherwise use enumerate index
|
|
496
|
+
try:
|
|
497
|
+
choice_idx = choice.index if hasattr(choice, 'index') and 'index' in choice else choice_enum_idx
|
|
498
|
+
except (KeyError, AttributeError):
|
|
499
|
+
choice_idx = choice_enum_idx
|
|
500
|
+
|
|
501
|
+
# Initialize accumulated data for this choice if not exists
|
|
502
|
+
if choice_idx not in accumulated_data:
|
|
503
|
+
accumulated_data[choice_idx] = {
|
|
504
|
+
'content': '',
|
|
505
|
+
'reasoning_content': '',
|
|
506
|
+
'tool_calls': [],
|
|
507
|
+
'logprobs': {'content': []},
|
|
508
|
+
'finished': False,
|
|
509
|
+
'finish_reason': None,
|
|
510
|
+
'all_choices_sent': False,
|
|
511
|
+
'role': None
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
# Handle message field - create if null
|
|
515
|
+
if not choice.message:
|
|
516
|
+
# Create message object with accumulated data
|
|
517
|
+
choice.message = {
|
|
518
|
+
'role': accumulated_data[choice_idx]['role'] if accumulated_data[choice_idx]['role'] else 'assistant',
|
|
519
|
+
'content': accumulated_data[choice_idx]['content']
|
|
520
|
+
}
|
|
521
|
+
if accumulated_data[choice_idx]['reasoning_content']:
|
|
522
|
+
choice.message['reasoning_content'] = accumulated_data[choice_idx]['reasoning_content']
|
|
523
|
+
if accumulated_data[choice_idx]['tool_calls']:
|
|
524
|
+
choice.message['tool_calls'] = accumulated_data[choice_idx]['tool_calls']
|
|
525
|
+
else:
|
|
526
|
+
# Save role if present
|
|
527
|
+
if hasattr(choice.message, 'role') and choice.message.role:
|
|
528
|
+
accumulated_data[choice_idx]['role'] = choice.message.role
|
|
529
|
+
|
|
530
|
+
# Handle content accumulation
|
|
531
|
+
if 'content' in choice.message:
|
|
532
|
+
current_content = choice.message.content
|
|
533
|
+
# Check if content is multimodal format
|
|
534
|
+
if isinstance(current_content, list):
|
|
535
|
+
# Handle multimodal content (array format)
|
|
536
|
+
# Initialize accumulated content as array if not already
|
|
537
|
+
if not isinstance(accumulated_data[choice_idx]['content'], list):
|
|
538
|
+
accumulated_data[choice_idx]['content'] = []
|
|
539
|
+
|
|
540
|
+
# Only process if current_content is not empty
|
|
541
|
+
if current_content:
|
|
542
|
+
# Ensure accumulated content list has enough elements
|
|
543
|
+
while len(accumulated_data[choice_idx]['content']) < len(current_content):
|
|
544
|
+
accumulated_data[choice_idx]['content'].append({'text': ''})
|
|
545
|
+
|
|
546
|
+
# Merge each content element
|
|
547
|
+
for content_idx, content_item in enumerate(current_content):
|
|
548
|
+
if isinstance(content_item, dict) and 'text' in content_item:
|
|
549
|
+
if content_item['text']:
|
|
550
|
+
# Accumulate text content
|
|
551
|
+
accumulated_data[choice_idx]['content'][content_idx]['text'] += content_item['text']
|
|
552
|
+
|
|
553
|
+
# Always set accumulated content back to response
|
|
554
|
+
choice.message.content = accumulated_data[choice_idx]['content']
|
|
555
|
+
elif current_content:
|
|
556
|
+
# Handle regular content (string format)
|
|
557
|
+
# Initialize accumulated content as string
|
|
558
|
+
if isinstance(accumulated_data[choice_idx]['content'], list):
|
|
559
|
+
accumulated_data[choice_idx]['content'] = ''
|
|
560
|
+
# Accumulate content if not empty
|
|
561
|
+
accumulated_data[choice_idx]['content'] += current_content
|
|
562
|
+
# Set accumulated content back to response
|
|
563
|
+
choice.message.content = accumulated_data[choice_idx]['content']
|
|
564
|
+
elif not current_content and accumulated_data[choice_idx]['content']:
|
|
565
|
+
# Current content is empty but we have accumulated content, restore it
|
|
566
|
+
choice.message.content = accumulated_data[choice_idx]['content']
|
|
567
|
+
|
|
568
|
+
# Handle reasoning_content accumulation
|
|
569
|
+
if 'reasoning_content' in choice.message:
|
|
570
|
+
current_reasoning_content = choice.message.reasoning_content
|
|
571
|
+
if current_reasoning_content:
|
|
572
|
+
accumulated_data[choice_idx]['reasoning_content'] += current_reasoning_content
|
|
573
|
+
# Always set the accumulated reasoning_content back if we
|
|
574
|
+
# have any, even if current response doesn't have it
|
|
575
|
+
if accumulated_data[choice_idx]['reasoning_content']:
|
|
576
|
+
choice.message.reasoning_content = accumulated_data[choice_idx]['reasoning_content']
|
|
577
|
+
|
|
578
|
+
# Handle tool_calls accumulation
|
|
579
|
+
if 'tool_calls' in choice.message and choice.message.tool_calls:
|
|
580
|
+
current_tool_calls = choice.message.tool_calls
|
|
581
|
+
|
|
582
|
+
# For each current tool call, accumulate its arguments
|
|
583
|
+
for current_call in current_tool_calls:
|
|
584
|
+
if isinstance(current_call, dict) and 'index' in current_call:
|
|
585
|
+
idx = current_call['index']
|
|
586
|
+
|
|
587
|
+
# Find existing accumulated call with same index
|
|
588
|
+
existing_call = None
|
|
589
|
+
for acc_call in accumulated_data[choice_idx]['tool_calls']:
|
|
590
|
+
if (isinstance(acc_call, dict) and
|
|
591
|
+
acc_call.get('index') == idx):
|
|
592
|
+
existing_call = acc_call
|
|
593
|
+
break
|
|
594
|
+
|
|
595
|
+
if existing_call:
|
|
596
|
+
# Accumulate function fields from current call
|
|
597
|
+
if ('function' in current_call and
|
|
598
|
+
current_call['function']):
|
|
599
|
+
if 'function' not in existing_call:
|
|
600
|
+
existing_call['function'] = {}
|
|
601
|
+
|
|
602
|
+
# Accumulate function.name
|
|
603
|
+
if 'name' in current_call['function']:
|
|
604
|
+
if 'name' not in existing_call['function']:
|
|
605
|
+
existing_call['function']['name'] = ''
|
|
606
|
+
existing_call['function']['name'] += current_call['function']['name']
|
|
607
|
+
|
|
608
|
+
# Accumulate function.arguments
|
|
609
|
+
if 'arguments' in current_call['function']:
|
|
610
|
+
if 'arguments' not in existing_call['function']:
|
|
611
|
+
existing_call['function']['arguments'] = ''
|
|
612
|
+
existing_call['function']['arguments'] += current_call['function']['arguments']
|
|
613
|
+
|
|
614
|
+
# Update other fields with latest values
|
|
615
|
+
existing_call.update({k: v for k, v in current_call.items()
|
|
616
|
+
if k != 'function' and v})
|
|
617
|
+
if 'function' in current_call and current_call['function']:
|
|
618
|
+
existing_call['function'].update({k: v for k, v in current_call['function'].items()
|
|
619
|
+
if k not in ['arguments', 'name'] and v})
|
|
620
|
+
else:
|
|
621
|
+
# Add new tool call
|
|
622
|
+
accumulated_data[choice_idx]['tool_calls'].append(dict(current_call))
|
|
623
|
+
|
|
624
|
+
# Update choice with accumulated tool_calls
|
|
625
|
+
choice.message.tool_calls = accumulated_data[choice_idx]['tool_calls']
|
|
626
|
+
elif accumulated_data[choice_idx]['tool_calls']:
|
|
627
|
+
# If current response has no tool_calls but we have accumulated tool_calls, restore them
|
|
628
|
+
choice.message.tool_calls = accumulated_data[choice_idx]['tool_calls']
|
|
629
|
+
|
|
630
|
+
# Restore role if we have it
|
|
631
|
+
if accumulated_data[choice_idx]['role'] and (not hasattr(choice.message, 'role') or not choice.message.role):
|
|
632
|
+
choice.message.role = accumulated_data[choice_idx]['role']
|
|
633
|
+
|
|
634
|
+
# Handle logprobs accumulation (only if logprobs exists)
|
|
635
|
+
try:
|
|
636
|
+
if ('logprobs' in choice and choice.logprobs and
|
|
637
|
+
isinstance(choice.logprobs, dict) and 'content' in choice.logprobs):
|
|
638
|
+
current_logprobs_content = choice.logprobs['content']
|
|
639
|
+
if current_logprobs_content and isinstance(current_logprobs_content, list):
|
|
640
|
+
# Initialize logprobs content if not exists
|
|
641
|
+
if 'logprobs' not in accumulated_data[choice_idx]:
|
|
642
|
+
accumulated_data[choice_idx]['logprobs'] = {'content': []}
|
|
643
|
+
elif 'content' not in accumulated_data[choice_idx]['logprobs']:
|
|
644
|
+
accumulated_data[choice_idx]['logprobs']['content'] = []
|
|
645
|
+
|
|
646
|
+
# Extend the accumulated logprobs content array
|
|
647
|
+
accumulated_data[choice_idx]['logprobs']['content'].extend(current_logprobs_content)
|
|
648
|
+
except (KeyError, AttributeError, TypeError):
|
|
649
|
+
# logprobs field might not exist or be in unexpected format, safely skip
|
|
650
|
+
pass
|
|
651
|
+
|
|
652
|
+
# Always set accumulated logprobs if we have any
|
|
653
|
+
if (accumulated_data[choice_idx]['logprobs']['content'] and
|
|
654
|
+
hasattr(choice, 'logprobs') and choice.logprobs):
|
|
655
|
+
choice.logprobs['content'] = accumulated_data[choice_idx][
|
|
656
|
+
'logprobs']['content']
|
|
657
|
+
|
|
658
|
+
# Handle finish_reason for n > 1 case
|
|
659
|
+
if (n > 1 and hasattr(choice, 'finish_reason') and
|
|
660
|
+
choice.finish_reason and
|
|
661
|
+
choice.finish_reason != 'null'):
|
|
662
|
+
accumulated_data[choice_idx]['finish_reason'] = \
|
|
663
|
+
choice.finish_reason
|
|
664
|
+
accumulated_data[choice_idx]['finished'] = True
|
|
665
|
+
|
|
666
|
+
# Handle n > 1 case: different strategies for different
|
|
667
|
+
# finish_reason
|
|
668
|
+
if n > 1:
|
|
669
|
+
# Count finished choices
|
|
670
|
+
finished_count = sum(1 for data in accumulated_data.values()
|
|
671
|
+
if isinstance(data, dict) and
|
|
672
|
+
data.get('finished', False))
|
|
673
|
+
|
|
674
|
+
# Find all finished choices in current packet
|
|
675
|
+
finished_choices_in_packet = []
|
|
676
|
+
for choice in choices:
|
|
677
|
+
if (hasattr(choice, 'finish_reason') and
|
|
678
|
+
choice.finish_reason and
|
|
679
|
+
choice.finish_reason != 'null'):
|
|
680
|
+
choice_idx = (choice.index if hasattr(choice, 'index') and
|
|
681
|
+
'index' in choice else 0)
|
|
682
|
+
finish_reason = choice.finish_reason
|
|
683
|
+
finished_choices_in_packet.append(
|
|
684
|
+
(choice_idx, finish_reason, choice))
|
|
685
|
+
|
|
686
|
+
# No finish_reason in current packet: return as is
|
|
687
|
+
if not finished_choices_in_packet:
|
|
688
|
+
return True
|
|
689
|
+
|
|
690
|
+
# Get finish_reason type from first finished choice
|
|
691
|
+
first_finish_reason = finished_choices_in_packet[0][1]
|
|
692
|
+
|
|
693
|
+
# For stop: wait all choices, then merge into one result
|
|
694
|
+
if first_finish_reason == 'stop':
|
|
695
|
+
if finished_count < n:
|
|
696
|
+
# Hide finish_reason until all finished
|
|
697
|
+
for choice in choices:
|
|
698
|
+
if (hasattr(choice, 'finish_reason') and
|
|
699
|
+
choice.finish_reason and
|
|
700
|
+
choice.finish_reason != 'null'):
|
|
701
|
+
choice.finish_reason = 'null'
|
|
702
|
+
else:
|
|
703
|
+
# All finished: merge all choices into one result
|
|
704
|
+
for data in accumulated_data.values():
|
|
705
|
+
if isinstance(data, dict) and 'all_choices_sent' in data:
|
|
706
|
+
data['all_choices_sent'] = True
|
|
707
|
+
|
|
708
|
+
# Return final result with all choices
|
|
709
|
+
all_choices = []
|
|
710
|
+
# Sort by choice_idx to ensure correct order
|
|
711
|
+
sorted_items = sorted(
|
|
712
|
+
[(idx, data) for idx, data in accumulated_data.items()
|
|
713
|
+
if isinstance(data, dict) and 'finished' in data],
|
|
714
|
+
key=lambda x: x[0]
|
|
715
|
+
)
|
|
716
|
+
|
|
717
|
+
for choice_idx, data in sorted_items:
|
|
718
|
+
# Create a new choice object
|
|
719
|
+
final_choice_dict = {
|
|
720
|
+
'index': choice_idx,
|
|
721
|
+
'finish_reason': data['finish_reason']
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
# Create message
|
|
725
|
+
message_dict = {
|
|
726
|
+
'role': data['role'] if data['role'] else 'assistant'
|
|
727
|
+
}
|
|
728
|
+
if data['content']:
|
|
729
|
+
message_dict['content'] = (
|
|
730
|
+
data['content'] if isinstance(data['content'],
|
|
731
|
+
str)
|
|
732
|
+
else data['content'])
|
|
733
|
+
if data['reasoning_content']:
|
|
734
|
+
message_dict['reasoning_content'] = (
|
|
735
|
+
data['reasoning_content'])
|
|
736
|
+
if data['tool_calls']:
|
|
737
|
+
message_dict['tool_calls'] = data['tool_calls']
|
|
738
|
+
|
|
739
|
+
final_choice_dict['message'] = message_dict
|
|
740
|
+
|
|
741
|
+
# Add logprobs if present
|
|
742
|
+
if data['logprobs']['content']:
|
|
743
|
+
final_choice_dict['logprobs'] = {
|
|
744
|
+
'content': data['logprobs']['content']
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
all_choices.append(final_choice_dict)
|
|
748
|
+
|
|
749
|
+
# Update output choices with all accumulated choices
|
|
750
|
+
parsed_response.output.choices = all_choices
|
|
751
|
+
|
|
752
|
+
# Aggregate usage from all choice indices
|
|
753
|
+
if 'usage_by_index' in accumulated_data and accumulated_data[
|
|
754
|
+
'usage_by_index']:
|
|
755
|
+
aggregated_usage = {}
|
|
756
|
+
usage_by_idx = accumulated_data['usage_by_index']
|
|
757
|
+
|
|
758
|
+
# Sum output_tokens and recalculate total_tokens
|
|
759
|
+
total_output_tokens = 0
|
|
760
|
+
input_tokens = None
|
|
761
|
+
prompt_tokens_details = None
|
|
762
|
+
|
|
763
|
+
for idx, usage in usage_by_idx.items():
|
|
764
|
+
if 'output_tokens' in usage:
|
|
765
|
+
total_output_tokens += usage['output_tokens']
|
|
766
|
+
# input_tokens should be the same for all indices
|
|
767
|
+
if input_tokens is None and 'input_tokens' in usage:
|
|
768
|
+
input_tokens = usage['input_tokens']
|
|
769
|
+
# Keep prompt_tokens_details from any index
|
|
770
|
+
# (should be same)
|
|
771
|
+
if (prompt_tokens_details is None and
|
|
772
|
+
'prompt_tokens_details' in usage):
|
|
773
|
+
prompt_tokens_details = usage[
|
|
774
|
+
'prompt_tokens_details']
|
|
775
|
+
|
|
776
|
+
# Build aggregated usage
|
|
777
|
+
if input_tokens is not None:
|
|
778
|
+
aggregated_usage['input_tokens'] = input_tokens
|
|
779
|
+
aggregated_usage['output_tokens'] = total_output_tokens
|
|
780
|
+
if input_tokens is not None:
|
|
781
|
+
aggregated_usage['total_tokens'] = (
|
|
782
|
+
input_tokens + total_output_tokens)
|
|
783
|
+
if prompt_tokens_details is not None:
|
|
784
|
+
aggregated_usage['prompt_tokens_details'] = (
|
|
785
|
+
prompt_tokens_details)
|
|
786
|
+
|
|
787
|
+
# Update response usage with aggregated values
|
|
788
|
+
parsed_response.usage = aggregated_usage
|
|
789
|
+
else:
|
|
790
|
+
# For non-stop (e.g., tool_calls): output each choice
|
|
791
|
+
# separately
|
|
792
|
+
responses_to_yield = []
|
|
793
|
+
|
|
794
|
+
for choice_idx, finish_reason, choice in finished_choices_in_packet:
|
|
795
|
+
current_data = accumulated_data.get(choice_idx)
|
|
796
|
+
if (current_data is None or
|
|
797
|
+
current_data.get('all_choices_sent', False)):
|
|
798
|
+
continue
|
|
799
|
+
|
|
800
|
+
current_data['all_choices_sent'] = True
|
|
801
|
+
|
|
802
|
+
# Create a new response for this choice
|
|
803
|
+
if responses_to_yield:
|
|
804
|
+
# Clone the response for additional choices
|
|
805
|
+
new_response = copy.deepcopy(parsed_response)
|
|
806
|
+
else:
|
|
807
|
+
# Use the original response for the first choice
|
|
808
|
+
new_response = parsed_response
|
|
809
|
+
|
|
810
|
+
# Deep copy choice to avoid modifying accumulated_data
|
|
811
|
+
choice_copy = copy.deepcopy(choice)
|
|
812
|
+
|
|
813
|
+
# Set only this choice in the response
|
|
814
|
+
new_response.output.choices = [choice_copy]
|
|
815
|
+
|
|
816
|
+
# Update usage with this choice's output tokens
|
|
817
|
+
if (new_response.usage and
|
|
818
|
+
'usage_by_index' in accumulated_data and
|
|
819
|
+
choice_idx in accumulated_data['usage_by_index']):
|
|
820
|
+
current_usage = accumulated_data['usage_by_index'][
|
|
821
|
+
choice_idx]
|
|
822
|
+
if 'output_tokens' in current_usage:
|
|
823
|
+
new_response.usage['output_tokens'] = (
|
|
824
|
+
current_usage['output_tokens'])
|
|
825
|
+
if 'input_tokens' in current_usage:
|
|
826
|
+
new_response.usage['total_tokens'] = (
|
|
827
|
+
current_usage['input_tokens'] +
|
|
828
|
+
current_usage['output_tokens'])
|
|
829
|
+
|
|
830
|
+
responses_to_yield.append(new_response)
|
|
831
|
+
|
|
832
|
+
# Return list of responses if we have any
|
|
833
|
+
if responses_to_yield:
|
|
834
|
+
return responses_to_yield
|
|
835
|
+
else:
|
|
836
|
+
return False
|
|
837
|
+
|
|
838
|
+
return True
|