promptlayer 1.0.59__py3-none-any.whl → 1.0.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of promptlayer might be problematic. Click here for more details.
- promptlayer/__init__.py +1 -1
- promptlayer/promptlayer.py +7 -2
- promptlayer/promptlayer_mixins.py +14 -12
- promptlayer/streaming/__init__.py +54 -0
- promptlayer/streaming/blueprint_builder.py +139 -0
- promptlayer/streaming/response_handlers.py +550 -0
- promptlayer/streaming/stream_processor.py +100 -0
- promptlayer/utils.py +1 -581
- {promptlayer-1.0.59.dist-info → promptlayer-1.0.61.dist-info}/METADATA +1 -1
- promptlayer-1.0.61.dist-info/RECORD +22 -0
- promptlayer-1.0.59.dist-info/RECORD +0 -18
- {promptlayer-1.0.59.dist-info → promptlayer-1.0.61.dist-info}/LICENSE +0 -0
- {promptlayer-1.0.59.dist-info → promptlayer-1.0.61.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Response handlers for different LLM providers
|
|
3
|
+
|
|
4
|
+
This module contains handlers that process streaming responses from various
|
|
5
|
+
LLM providers and return both the final response and prompt blueprint.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, AsyncIterable, List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def openai_stream_chat(results: list):
|
|
12
|
+
"""Process OpenAI streaming chat results and return response + blueprint"""
|
|
13
|
+
from openai.types.chat import (
|
|
14
|
+
ChatCompletion,
|
|
15
|
+
ChatCompletionChunk,
|
|
16
|
+
ChatCompletionMessage,
|
|
17
|
+
ChatCompletionMessageToolCall,
|
|
18
|
+
)
|
|
19
|
+
from openai.types.chat.chat_completion import Choice
|
|
20
|
+
from openai.types.chat.chat_completion_message_tool_call import Function
|
|
21
|
+
|
|
22
|
+
chat_completion_chunks: List[ChatCompletionChunk] = results
|
|
23
|
+
response: ChatCompletion = ChatCompletion(
|
|
24
|
+
id="",
|
|
25
|
+
object="chat.completion",
|
|
26
|
+
choices=[
|
|
27
|
+
Choice(
|
|
28
|
+
finish_reason="stop",
|
|
29
|
+
index=0,
|
|
30
|
+
message=ChatCompletionMessage(role="assistant"),
|
|
31
|
+
)
|
|
32
|
+
],
|
|
33
|
+
created=0,
|
|
34
|
+
model="",
|
|
35
|
+
)
|
|
36
|
+
last_result = chat_completion_chunks[-1]
|
|
37
|
+
response.id = last_result.id
|
|
38
|
+
response.created = last_result.created
|
|
39
|
+
response.model = last_result.model
|
|
40
|
+
response.system_fingerprint = last_result.system_fingerprint
|
|
41
|
+
response.usage = last_result.usage
|
|
42
|
+
content = ""
|
|
43
|
+
tool_calls: List[ChatCompletionMessageToolCall] = []
|
|
44
|
+
|
|
45
|
+
for result in chat_completion_chunks:
|
|
46
|
+
choices = result.choices
|
|
47
|
+
if len(choices) == 0:
|
|
48
|
+
continue
|
|
49
|
+
if choices[0].delta.content:
|
|
50
|
+
content = f"{content}{result.choices[0].delta.content}"
|
|
51
|
+
|
|
52
|
+
delta = choices[0].delta
|
|
53
|
+
if delta.tool_calls:
|
|
54
|
+
last_tool_call = None
|
|
55
|
+
if len(tool_calls) > 0:
|
|
56
|
+
last_tool_call = tool_calls[-1]
|
|
57
|
+
tool_call = delta.tool_calls[0]
|
|
58
|
+
if not tool_call.function:
|
|
59
|
+
continue
|
|
60
|
+
if not last_tool_call or tool_call.id:
|
|
61
|
+
tool_calls.append(
|
|
62
|
+
ChatCompletionMessageToolCall(
|
|
63
|
+
id=tool_call.id or "",
|
|
64
|
+
function=Function(
|
|
65
|
+
name=tool_call.function.name or "",
|
|
66
|
+
arguments=tool_call.function.arguments or "",
|
|
67
|
+
),
|
|
68
|
+
type=tool_call.type or "function",
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
continue
|
|
72
|
+
last_tool_call.function.name = f"{last_tool_call.function.name}{tool_call.function.name or ''}"
|
|
73
|
+
last_tool_call.function.arguments = (
|
|
74
|
+
f"{last_tool_call.function.arguments}{tool_call.function.arguments or ''}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
response.choices[0].message.content = content
|
|
78
|
+
response.choices[0].message.tool_calls = tool_calls if tool_calls else None
|
|
79
|
+
return response
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def aopenai_stream_chat(generator: AsyncIterable[Any]) -> Any:
|
|
83
|
+
"""Async version of openai_stream_chat"""
|
|
84
|
+
from openai.types.chat import (
|
|
85
|
+
ChatCompletion,
|
|
86
|
+
ChatCompletionChunk,
|
|
87
|
+
ChatCompletionMessage,
|
|
88
|
+
ChatCompletionMessageToolCall,
|
|
89
|
+
)
|
|
90
|
+
from openai.types.chat.chat_completion import Choice
|
|
91
|
+
from openai.types.chat.chat_completion_message_tool_call import Function
|
|
92
|
+
|
|
93
|
+
chat_completion_chunks: List[ChatCompletionChunk] = []
|
|
94
|
+
response: ChatCompletion = ChatCompletion(
|
|
95
|
+
id="",
|
|
96
|
+
object="chat.completion",
|
|
97
|
+
choices=[
|
|
98
|
+
Choice(
|
|
99
|
+
finish_reason="stop",
|
|
100
|
+
index=0,
|
|
101
|
+
message=ChatCompletionMessage(role="assistant"),
|
|
102
|
+
)
|
|
103
|
+
],
|
|
104
|
+
created=0,
|
|
105
|
+
model="",
|
|
106
|
+
)
|
|
107
|
+
content = ""
|
|
108
|
+
tool_calls: List[ChatCompletionMessageToolCall] = []
|
|
109
|
+
|
|
110
|
+
async for result in generator:
|
|
111
|
+
chat_completion_chunks.append(result)
|
|
112
|
+
choices = result.choices
|
|
113
|
+
if len(choices) == 0:
|
|
114
|
+
continue
|
|
115
|
+
if choices[0].delta.content:
|
|
116
|
+
content = f"{content}{choices[0].delta.content}"
|
|
117
|
+
|
|
118
|
+
delta = choices[0].delta
|
|
119
|
+
if delta.tool_calls:
|
|
120
|
+
last_tool_call = None
|
|
121
|
+
if len(tool_calls) > 0:
|
|
122
|
+
last_tool_call = tool_calls[-1]
|
|
123
|
+
tool_call = delta.tool_calls[0]
|
|
124
|
+
if not tool_call.function:
|
|
125
|
+
continue
|
|
126
|
+
if not last_tool_call or tool_call.id:
|
|
127
|
+
tool_calls.append(
|
|
128
|
+
ChatCompletionMessageToolCall(
|
|
129
|
+
id=tool_call.id or "",
|
|
130
|
+
function=Function(
|
|
131
|
+
name=tool_call.function.name or "",
|
|
132
|
+
arguments=tool_call.function.arguments or "",
|
|
133
|
+
),
|
|
134
|
+
type=tool_call.type or "function",
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
continue
|
|
138
|
+
last_tool_call.function.name = f"{last_tool_call.function.name}{tool_call.function.name or ''}"
|
|
139
|
+
last_tool_call.function.arguments = (
|
|
140
|
+
f"{last_tool_call.function.arguments}{tool_call.function.arguments or ''}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# After collecting all chunks, set the response attributes
|
|
144
|
+
if chat_completion_chunks:
|
|
145
|
+
last_result = chat_completion_chunks[-1]
|
|
146
|
+
response.id = last_result.id
|
|
147
|
+
response.created = last_result.created
|
|
148
|
+
response.model = last_result.model
|
|
149
|
+
response.system_fingerprint = getattr(last_result, "system_fingerprint", None)
|
|
150
|
+
response.usage = last_result.usage
|
|
151
|
+
|
|
152
|
+
response.choices[0].message.content = content
|
|
153
|
+
response.choices[0].message.tool_calls = tool_calls if tool_calls else None
|
|
154
|
+
return response
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def anthropic_stream_message(results: list):
|
|
158
|
+
"""Process Anthropic streaming message results and return response + blueprint"""
|
|
159
|
+
from anthropic.types import Message, MessageStreamEvent, Usage
|
|
160
|
+
|
|
161
|
+
from promptlayer.utils import build_anthropic_content_blocks
|
|
162
|
+
|
|
163
|
+
message_stream_events: List[MessageStreamEvent] = results
|
|
164
|
+
response: Message = Message(
|
|
165
|
+
id="",
|
|
166
|
+
model="",
|
|
167
|
+
content=[],
|
|
168
|
+
role="assistant",
|
|
169
|
+
type="message",
|
|
170
|
+
stop_reason="stop_sequence",
|
|
171
|
+
stop_sequence=None,
|
|
172
|
+
usage=Usage(input_tokens=0, output_tokens=0),
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
for event in message_stream_events:
|
|
176
|
+
if event.type == "message_start":
|
|
177
|
+
response = event.message
|
|
178
|
+
break
|
|
179
|
+
|
|
180
|
+
content_blocks, usage, stop_reason = build_anthropic_content_blocks(message_stream_events)
|
|
181
|
+
response.content = content_blocks
|
|
182
|
+
if usage:
|
|
183
|
+
response.usage.output_tokens = usage.output_tokens
|
|
184
|
+
if stop_reason:
|
|
185
|
+
response.stop_reason = stop_reason
|
|
186
|
+
|
|
187
|
+
return response
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
async def aanthropic_stream_message(generator: AsyncIterable[Any]) -> Any:
|
|
191
|
+
"""Async version of anthropic_stream_message"""
|
|
192
|
+
from anthropic.types import Message, MessageStreamEvent, Usage
|
|
193
|
+
|
|
194
|
+
from promptlayer.utils import build_anthropic_content_blocks
|
|
195
|
+
|
|
196
|
+
message_stream_events: List[MessageStreamEvent] = []
|
|
197
|
+
response: Message = Message(
|
|
198
|
+
id="",
|
|
199
|
+
model="",
|
|
200
|
+
content=[],
|
|
201
|
+
role="assistant",
|
|
202
|
+
type="message",
|
|
203
|
+
stop_reason="stop_sequence",
|
|
204
|
+
stop_sequence=None,
|
|
205
|
+
usage=Usage(input_tokens=0, output_tokens=0),
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
async for event in generator:
|
|
209
|
+
if event.type == "message_start":
|
|
210
|
+
response = event.message
|
|
211
|
+
message_stream_events.append(event)
|
|
212
|
+
|
|
213
|
+
content_blocks, usage, stop_reason = build_anthropic_content_blocks(message_stream_events)
|
|
214
|
+
response.content = content_blocks
|
|
215
|
+
if usage:
|
|
216
|
+
response.usage.output_tokens = usage.output_tokens
|
|
217
|
+
if stop_reason:
|
|
218
|
+
response.stop_reason = stop_reason
|
|
219
|
+
|
|
220
|
+
return response
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def openai_stream_completion(results: list):
|
|
224
|
+
from openai.types.completion import Completion, CompletionChoice
|
|
225
|
+
|
|
226
|
+
completions: List[Completion] = results
|
|
227
|
+
last_chunk = completions[-1]
|
|
228
|
+
response = Completion(
|
|
229
|
+
id=last_chunk.id,
|
|
230
|
+
created=last_chunk.created,
|
|
231
|
+
model=last_chunk.model,
|
|
232
|
+
object="text_completion",
|
|
233
|
+
choices=[CompletionChoice(finish_reason="stop", index=0, text="")],
|
|
234
|
+
)
|
|
235
|
+
text = ""
|
|
236
|
+
for completion in completions:
|
|
237
|
+
usage = completion.usage
|
|
238
|
+
system_fingerprint = completion.system_fingerprint
|
|
239
|
+
if len(completion.choices) > 0 and completion.choices[0].text:
|
|
240
|
+
text = f"{text}{completion.choices[0].text}"
|
|
241
|
+
if usage:
|
|
242
|
+
response.usage = usage
|
|
243
|
+
if system_fingerprint:
|
|
244
|
+
response.system_fingerprint = system_fingerprint
|
|
245
|
+
response.choices[0].text = text
|
|
246
|
+
return response
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
async def aopenai_stream_completion(generator: AsyncIterable[Any]) -> Any:
|
|
250
|
+
from openai.types.completion import Completion, CompletionChoice
|
|
251
|
+
|
|
252
|
+
completions: List[Completion] = []
|
|
253
|
+
text = ""
|
|
254
|
+
response = Completion(
|
|
255
|
+
id="",
|
|
256
|
+
created=0,
|
|
257
|
+
model="",
|
|
258
|
+
object="text_completion",
|
|
259
|
+
choices=[CompletionChoice(finish_reason="stop", index=0, text="")],
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
async for completion in generator:
|
|
263
|
+
completions.append(completion)
|
|
264
|
+
usage = completion.usage
|
|
265
|
+
system_fingerprint = getattr(completion, "system_fingerprint", None)
|
|
266
|
+
if len(completion.choices) > 0 and completion.choices[0].text:
|
|
267
|
+
text = f"{text}{completion.choices[0].text}"
|
|
268
|
+
if usage:
|
|
269
|
+
response.usage = usage
|
|
270
|
+
if system_fingerprint:
|
|
271
|
+
response.system_fingerprint = system_fingerprint
|
|
272
|
+
|
|
273
|
+
# After collecting all completions, set the response attributes
|
|
274
|
+
if completions:
|
|
275
|
+
last_chunk = completions[-1]
|
|
276
|
+
response.id = last_chunk.id
|
|
277
|
+
response.created = last_chunk.created
|
|
278
|
+
response.model = last_chunk.model
|
|
279
|
+
|
|
280
|
+
response.choices[0].text = text
|
|
281
|
+
return response
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def anthropic_stream_completion(results: list):
|
|
285
|
+
from anthropic.types import Completion
|
|
286
|
+
|
|
287
|
+
completions: List[Completion] = results
|
|
288
|
+
last_chunk = completions[-1]
|
|
289
|
+
response = Completion(
|
|
290
|
+
id=last_chunk.id,
|
|
291
|
+
completion="",
|
|
292
|
+
model=last_chunk.model,
|
|
293
|
+
stop_reason="stop",
|
|
294
|
+
type="completion",
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
text = ""
|
|
298
|
+
for completion in completions:
|
|
299
|
+
text = f"{text}{completion.completion}"
|
|
300
|
+
response.completion = text
|
|
301
|
+
return response
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
async def aanthropic_stream_completion(generator: AsyncIterable[Any]) -> Any:
|
|
305
|
+
from anthropic.types import Completion
|
|
306
|
+
|
|
307
|
+
completions: List[Completion] = []
|
|
308
|
+
text = ""
|
|
309
|
+
response = Completion(
|
|
310
|
+
id="",
|
|
311
|
+
completion="",
|
|
312
|
+
model="",
|
|
313
|
+
stop_reason="stop",
|
|
314
|
+
type="completion",
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
async for completion in generator:
|
|
318
|
+
completions.append(completion)
|
|
319
|
+
text = f"{text}{completion.completion}"
|
|
320
|
+
|
|
321
|
+
# After collecting all completions, set the response attributes
|
|
322
|
+
if completions:
|
|
323
|
+
last_chunk = completions[-1]
|
|
324
|
+
response.id = last_chunk.id
|
|
325
|
+
response.model = last_chunk.model
|
|
326
|
+
|
|
327
|
+
response.completion = text
|
|
328
|
+
return response
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _build_google_response_from_parts(thought_content: str, regular_content: str, function_calls: list, last_result):
|
|
332
|
+
"""Helper function to build Google response with thought, regular, and function call parts."""
|
|
333
|
+
from google.genai.chats import Part
|
|
334
|
+
|
|
335
|
+
response = last_result.model_copy()
|
|
336
|
+
final_parts = []
|
|
337
|
+
|
|
338
|
+
if thought_content:
|
|
339
|
+
thought_part = Part(text=thought_content, thought=True)
|
|
340
|
+
final_parts.append(thought_part)
|
|
341
|
+
|
|
342
|
+
if regular_content:
|
|
343
|
+
text_part = Part(text=regular_content, thought=None)
|
|
344
|
+
final_parts.append(text_part)
|
|
345
|
+
|
|
346
|
+
for function_call in function_calls:
|
|
347
|
+
function_part = Part(function_call=function_call, thought=None)
|
|
348
|
+
final_parts.append(function_part)
|
|
349
|
+
|
|
350
|
+
if final_parts:
|
|
351
|
+
response.candidates[0].content.parts = final_parts
|
|
352
|
+
|
|
353
|
+
return response
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
async def amap_google_stream_response(generator: AsyncIterable[Any]):
|
|
357
|
+
from google.genai.chats import GenerateContentResponse
|
|
358
|
+
|
|
359
|
+
response = GenerateContentResponse()
|
|
360
|
+
|
|
361
|
+
thought_content = ""
|
|
362
|
+
regular_content = ""
|
|
363
|
+
function_calls = []
|
|
364
|
+
last_result = None
|
|
365
|
+
|
|
366
|
+
async for result in generator:
|
|
367
|
+
last_result = result
|
|
368
|
+
if result.candidates and result.candidates[0].content.parts:
|
|
369
|
+
for part in result.candidates[0].content.parts:
|
|
370
|
+
if hasattr(part, "text") and part.text:
|
|
371
|
+
if hasattr(part, "thought") and part.thought:
|
|
372
|
+
thought_content = f"{thought_content}{part.text}"
|
|
373
|
+
else:
|
|
374
|
+
regular_content = f"{regular_content}{part.text}"
|
|
375
|
+
elif hasattr(part, "function_call") and part.function_call:
|
|
376
|
+
function_calls.append(part.function_call)
|
|
377
|
+
|
|
378
|
+
if not last_result:
|
|
379
|
+
return response
|
|
380
|
+
|
|
381
|
+
return _build_google_response_from_parts(thought_content, regular_content, function_calls, last_result)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
async def agoogle_stream_chat(generator: AsyncIterable[Any]):
|
|
385
|
+
return await amap_google_stream_response(generator)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
async def agoogle_stream_completion(generator: AsyncIterable[Any]):
|
|
389
|
+
return await amap_google_stream_response(generator)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def map_google_stream_response(results: list):
|
|
393
|
+
from google.genai.chats import GenerateContentResponse
|
|
394
|
+
|
|
395
|
+
response = GenerateContentResponse()
|
|
396
|
+
if not results:
|
|
397
|
+
return response
|
|
398
|
+
results: List[GenerateContentResponse] = results
|
|
399
|
+
|
|
400
|
+
thought_content = ""
|
|
401
|
+
regular_content = ""
|
|
402
|
+
function_calls = []
|
|
403
|
+
|
|
404
|
+
for result in results:
|
|
405
|
+
if result.candidates and result.candidates[0].content.parts:
|
|
406
|
+
for part in result.candidates[0].content.parts:
|
|
407
|
+
if hasattr(part, "text") and part.text:
|
|
408
|
+
if hasattr(part, "thought") and part.thought:
|
|
409
|
+
thought_content = f"{thought_content}{part.text}"
|
|
410
|
+
else:
|
|
411
|
+
regular_content = f"{regular_content}{part.text}"
|
|
412
|
+
elif hasattr(part, "function_call") and part.function_call:
|
|
413
|
+
function_calls.append(part.function_call)
|
|
414
|
+
|
|
415
|
+
return _build_google_response_from_parts(thought_content, regular_content, function_calls, results[-1])
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def google_stream_chat(results: list):
|
|
419
|
+
return map_google_stream_response(results)
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def google_stream_completion(results: list):
|
|
423
|
+
return map_google_stream_response(results)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def mistral_stream_chat(results: list):
|
|
427
|
+
from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionMessageToolCall
|
|
428
|
+
from openai.types.chat.chat_completion import Choice
|
|
429
|
+
from openai.types.chat.chat_completion_message_tool_call import Function
|
|
430
|
+
|
|
431
|
+
last_result = results[-1]
|
|
432
|
+
response = ChatCompletion(
|
|
433
|
+
id=last_result.data.id,
|
|
434
|
+
object="chat.completion",
|
|
435
|
+
choices=[
|
|
436
|
+
Choice(
|
|
437
|
+
finish_reason=last_result.data.choices[0].finish_reason or "stop",
|
|
438
|
+
index=0,
|
|
439
|
+
message=ChatCompletionMessage(role="assistant"),
|
|
440
|
+
)
|
|
441
|
+
],
|
|
442
|
+
created=last_result.data.created,
|
|
443
|
+
model=last_result.data.model,
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
content = ""
|
|
447
|
+
tool_calls = None
|
|
448
|
+
|
|
449
|
+
for result in results:
|
|
450
|
+
choices = result.data.choices
|
|
451
|
+
if len(choices) == 0:
|
|
452
|
+
continue
|
|
453
|
+
|
|
454
|
+
delta = choices[0].delta
|
|
455
|
+
if delta.content is not None:
|
|
456
|
+
content = f"{content}{delta.content}"
|
|
457
|
+
|
|
458
|
+
if delta.tool_calls:
|
|
459
|
+
tool_calls = tool_calls or []
|
|
460
|
+
for tool_call in delta.tool_calls:
|
|
461
|
+
if len(tool_calls) == 0 or tool_call.id:
|
|
462
|
+
tool_calls.append(
|
|
463
|
+
ChatCompletionMessageToolCall(
|
|
464
|
+
id=tool_call.id or "",
|
|
465
|
+
function=Function(
|
|
466
|
+
name=tool_call.function.name,
|
|
467
|
+
arguments=tool_call.function.arguments,
|
|
468
|
+
),
|
|
469
|
+
type="function",
|
|
470
|
+
)
|
|
471
|
+
)
|
|
472
|
+
else:
|
|
473
|
+
last_tool_call = tool_calls[-1]
|
|
474
|
+
if tool_call.function.name:
|
|
475
|
+
last_tool_call.function.name = f"{last_tool_call.function.name}{tool_call.function.name}"
|
|
476
|
+
if tool_call.function.arguments:
|
|
477
|
+
last_tool_call.function.arguments = (
|
|
478
|
+
f"{last_tool_call.function.arguments}{tool_call.function.arguments}"
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
response.choices[0].message.content = content
|
|
482
|
+
response.choices[0].message.tool_calls = tool_calls
|
|
483
|
+
response.usage = last_result.data.usage
|
|
484
|
+
return response
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
async def amistral_stream_chat(generator: AsyncIterable[Any]) -> Any:
|
|
488
|
+
from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionMessageToolCall
|
|
489
|
+
from openai.types.chat.chat_completion import Choice
|
|
490
|
+
from openai.types.chat.chat_completion_message_tool_call import Function
|
|
491
|
+
|
|
492
|
+
completion_chunks = []
|
|
493
|
+
response = ChatCompletion(
|
|
494
|
+
id="",
|
|
495
|
+
object="chat.completion",
|
|
496
|
+
choices=[
|
|
497
|
+
Choice(
|
|
498
|
+
finish_reason="stop",
|
|
499
|
+
index=0,
|
|
500
|
+
message=ChatCompletionMessage(role="assistant"),
|
|
501
|
+
)
|
|
502
|
+
],
|
|
503
|
+
created=0,
|
|
504
|
+
model="",
|
|
505
|
+
)
|
|
506
|
+
content = ""
|
|
507
|
+
tool_calls = None
|
|
508
|
+
|
|
509
|
+
async for result in generator:
|
|
510
|
+
completion_chunks.append(result)
|
|
511
|
+
choices = result.data.choices
|
|
512
|
+
if len(choices) == 0:
|
|
513
|
+
continue
|
|
514
|
+
delta = choices[0].delta
|
|
515
|
+
if delta.content is not None:
|
|
516
|
+
content = f"{content}{delta.content}"
|
|
517
|
+
|
|
518
|
+
if delta.tool_calls:
|
|
519
|
+
tool_calls = tool_calls or []
|
|
520
|
+
for tool_call in delta.tool_calls:
|
|
521
|
+
if len(tool_calls) == 0 or tool_call.id:
|
|
522
|
+
tool_calls.append(
|
|
523
|
+
ChatCompletionMessageToolCall(
|
|
524
|
+
id=tool_call.id or "",
|
|
525
|
+
function=Function(
|
|
526
|
+
name=tool_call.function.name,
|
|
527
|
+
arguments=tool_call.function.arguments,
|
|
528
|
+
),
|
|
529
|
+
type="function",
|
|
530
|
+
)
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
last_tool_call = tool_calls[-1]
|
|
534
|
+
if tool_call.function.name:
|
|
535
|
+
last_tool_call.function.name = f"{last_tool_call.function.name}{tool_call.function.name}"
|
|
536
|
+
if tool_call.function.arguments:
|
|
537
|
+
last_tool_call.function.arguments = (
|
|
538
|
+
f"{last_tool_call.function.arguments}{tool_call.function.arguments}"
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if completion_chunks:
|
|
542
|
+
last_result = completion_chunks[-1]
|
|
543
|
+
response.id = last_result.data.id
|
|
544
|
+
response.created = last_result.data.created
|
|
545
|
+
response.model = last_result.data.model
|
|
546
|
+
response.usage = last_result.data.usage
|
|
547
|
+
|
|
548
|
+
response.choices[0].message.content = content
|
|
549
|
+
response.choices[0].message.tool_calls = tool_calls
|
|
550
|
+
return response
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stream processors for handling streaming responses
|
|
3
|
+
|
|
4
|
+
This module contains the main streaming logic that processes streaming responses
|
|
5
|
+
from various LLM providers and builds progressive prompt blueprints.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, AsyncGenerator, AsyncIterable, Callable, Dict, Generator
|
|
9
|
+
|
|
10
|
+
from .blueprint_builder import (
|
|
11
|
+
build_prompt_blueprint_from_anthropic_event,
|
|
12
|
+
build_prompt_blueprint_from_google_event,
|
|
13
|
+
build_prompt_blueprint_from_openai_chunk,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def stream_response(*, generator: Generator, after_stream: Callable, map_results: Callable, metadata: Dict):
|
|
18
|
+
"""
|
|
19
|
+
Process streaming responses and build progressive prompt blueprints
|
|
20
|
+
|
|
21
|
+
Supports OpenAI, Anthropic, and Google (Gemini) streaming formats, building blueprints
|
|
22
|
+
progressively as the stream progresses.
|
|
23
|
+
"""
|
|
24
|
+
results = []
|
|
25
|
+
stream_blueprint = None
|
|
26
|
+
for result in generator:
|
|
27
|
+
results.append(result)
|
|
28
|
+
|
|
29
|
+
# Handle OpenAI streaming format - process each chunk individually
|
|
30
|
+
if hasattr(result, "choices"):
|
|
31
|
+
stream_blueprint = build_prompt_blueprint_from_openai_chunk(result, metadata)
|
|
32
|
+
|
|
33
|
+
# Handle Google streaming format (Gemini) - GenerateContentResponse objects
|
|
34
|
+
elif hasattr(result, "candidates"):
|
|
35
|
+
stream_blueprint = build_prompt_blueprint_from_google_event(result, metadata)
|
|
36
|
+
|
|
37
|
+
# Handle Anthropic streaming format - process each event individually
|
|
38
|
+
elif hasattr(result, "type"):
|
|
39
|
+
stream_blueprint = build_prompt_blueprint_from_anthropic_event(result, metadata)
|
|
40
|
+
|
|
41
|
+
data = {
|
|
42
|
+
"request_id": None,
|
|
43
|
+
"raw_response": result,
|
|
44
|
+
"prompt_blueprint": stream_blueprint,
|
|
45
|
+
}
|
|
46
|
+
yield data
|
|
47
|
+
|
|
48
|
+
request_response = map_results(results)
|
|
49
|
+
response = after_stream(request_response=request_response.model_dump(mode="json"))
|
|
50
|
+
data["request_id"] = response.get("request_id")
|
|
51
|
+
data["prompt_blueprint"] = response.get("prompt_blueprint")
|
|
52
|
+
yield data
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def astream_response(
|
|
56
|
+
generator: AsyncIterable[Any],
|
|
57
|
+
after_stream: Callable[..., Any],
|
|
58
|
+
map_results: Callable[[Any], Any],
|
|
59
|
+
metadata: Dict[str, Any] = None,
|
|
60
|
+
) -> AsyncGenerator[Dict[str, Any], None]:
|
|
61
|
+
"""
|
|
62
|
+
Async version of stream_response
|
|
63
|
+
|
|
64
|
+
Process streaming responses asynchronously and build progressive prompt blueprints
|
|
65
|
+
Supports OpenAI, Anthropic, and Google (Gemini) streaming formats.
|
|
66
|
+
"""
|
|
67
|
+
results = []
|
|
68
|
+
stream_blueprint = None
|
|
69
|
+
|
|
70
|
+
async for result in generator:
|
|
71
|
+
results.append(result)
|
|
72
|
+
|
|
73
|
+
# Handle OpenAI streaming format - process each chunk individually
|
|
74
|
+
if hasattr(result, "choices"):
|
|
75
|
+
stream_blueprint = build_prompt_blueprint_from_openai_chunk(result, metadata)
|
|
76
|
+
|
|
77
|
+
# Handle Google streaming format (Gemini) - GenerateContentResponse objects
|
|
78
|
+
elif hasattr(result, "candidates"):
|
|
79
|
+
stream_blueprint = build_prompt_blueprint_from_google_event(result, metadata)
|
|
80
|
+
|
|
81
|
+
# Handle Anthropic streaming format - process each event individually
|
|
82
|
+
elif hasattr(result, "type"):
|
|
83
|
+
stream_blueprint = build_prompt_blueprint_from_anthropic_event(result, metadata)
|
|
84
|
+
|
|
85
|
+
data = {
|
|
86
|
+
"request_id": None,
|
|
87
|
+
"raw_response": result,
|
|
88
|
+
"prompt_blueprint": stream_blueprint,
|
|
89
|
+
}
|
|
90
|
+
yield data
|
|
91
|
+
|
|
92
|
+
async def async_generator_from_list(lst):
|
|
93
|
+
for item in lst:
|
|
94
|
+
yield item
|
|
95
|
+
|
|
96
|
+
request_response = await map_results(async_generator_from_list(results))
|
|
97
|
+
after_stream_response = await after_stream(request_response=request_response.model_dump(mode="json"))
|
|
98
|
+
data["request_id"] = after_stream_response.get("request_id")
|
|
99
|
+
data["prompt_blueprint"] = after_stream_response.get("prompt_blueprint")
|
|
100
|
+
yield data
|