promptbuilder 0.4.34__tar.gz → 0.4.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {promptbuilder-0.4.34/promptbuilder.egg-info → promptbuilder-0.4.36}/PKG-INFO +1 -1
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/anthropic_client.py +2 -2
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/base_client.py +87 -4
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/bedrock_client.py +2 -2
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/google_client.py +4 -4
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/litellm_client.py +233 -6
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/logfire_decorators.py +18 -22
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/openai_client.py +2 -2
- {promptbuilder-0.4.34 → promptbuilder-0.4.36/promptbuilder.egg-info}/PKG-INFO +1 -1
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/setup.py +1 -1
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/LICENSE +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/MANIFEST.in +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/Readme.md +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/__init__.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/agent/__init__.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/agent/agent.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/agent/context.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/agent/tool.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/agent/utils.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/embeddings.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/__init__.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/aisuite_client.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/config.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/exceptions.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/main.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/types.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/utils.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/vertex_client.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/prompt_builder.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder.egg-info/SOURCES.txt +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder.egg-info/dependency_links.txt +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder.egg-info/requires.txt +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder.egg-info/top_level.txt +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/pyproject.toml +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/setup.cfg +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/tests/test_llm_client.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/tests/test_llm_client_async.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/tests/test_timeout_google.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/tests/test_timeout_litellm.py +0 -0
- {promptbuilder-0.4.34 → promptbuilder-0.4.36}/tests/test_timeout_openai.py +0 -0
|
@@ -313,7 +313,7 @@ class AnthropicLLMClient(BaseLLMClient):
|
|
|
313
313
|
raise ValueError(f"Unsupported result type: {result_type}")
|
|
314
314
|
|
|
315
315
|
@_error_handler
|
|
316
|
-
def
|
|
316
|
+
def _create_stream(
|
|
317
317
|
self,
|
|
318
318
|
messages: list[Content],
|
|
319
319
|
*,
|
|
@@ -576,7 +576,7 @@ class AnthropicLLMClientAsync(BaseLLMClientAsync):
|
|
|
576
576
|
raise ValueError(f"Unsupported result_type: {result_type}. Supported types are: None, 'json', or a Pydantic model.")
|
|
577
577
|
|
|
578
578
|
@_error_handler_async
|
|
579
|
-
async def
|
|
579
|
+
async def _create_stream(
|
|
580
580
|
self,
|
|
581
581
|
messages: list[Content],
|
|
582
582
|
*,
|
|
@@ -108,7 +108,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
108
108
|
|
|
109
109
|
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
110
110
|
if autocomplete:
|
|
111
|
-
while
|
|
111
|
+
while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
|
|
112
112
|
BaseLLMClient._append_generated_part(messages, response)
|
|
113
113
|
|
|
114
114
|
response = self._create(
|
|
@@ -296,7 +296,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
296
296
|
@logfire_decorators.create_stream
|
|
297
297
|
@utils.retry_cls
|
|
298
298
|
@utils.rpm_limit_cls
|
|
299
|
-
def
|
|
299
|
+
def _create_stream(
|
|
300
300
|
self,
|
|
301
301
|
messages: list[Content],
|
|
302
302
|
*,
|
|
@@ -306,6 +306,47 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
306
306
|
) -> Iterator[Response]:
|
|
307
307
|
raise NotImplementedError
|
|
308
308
|
|
|
309
|
+
def create_stream(
|
|
310
|
+
self,
|
|
311
|
+
messages: list[Content],
|
|
312
|
+
*,
|
|
313
|
+
thinking_config: ThinkingConfig | None = None,
|
|
314
|
+
system_message: str | None = None,
|
|
315
|
+
max_tokens: int | None = None,
|
|
316
|
+
autocomplete: bool = False,
|
|
317
|
+
) -> Iterator[Response]:
|
|
318
|
+
if max_tokens is None:
|
|
319
|
+
max_tokens = self.default_max_tokens
|
|
320
|
+
|
|
321
|
+
stream_messages = []
|
|
322
|
+
|
|
323
|
+
total_count = 0
|
|
324
|
+
response = None
|
|
325
|
+
for response in self._create_stream(
|
|
326
|
+
messages=messages,
|
|
327
|
+
thinking_config=thinking_config,
|
|
328
|
+
system_message=system_message,
|
|
329
|
+
max_tokens=max_tokens if not autocomplete else None,
|
|
330
|
+
):
|
|
331
|
+
BaseLLMClient._append_generated_part(stream_messages, response)
|
|
332
|
+
total_count += BaseLLMClient._response_out_tokens(response)
|
|
333
|
+
yield response
|
|
334
|
+
finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
|
|
335
|
+
if finish_reason and autocomplete:
|
|
336
|
+
while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
|
|
337
|
+
for response in self._create_stream(
|
|
338
|
+
messages=messages,
|
|
339
|
+
thinking_config=thinking_config,
|
|
340
|
+
system_message=system_message,
|
|
341
|
+
max_tokens=max_tokens if not autocomplete else None,
|
|
342
|
+
):
|
|
343
|
+
BaseLLMClient._append_generated_part(stream_messages, response)
|
|
344
|
+
total_count += BaseLLMClient._response_out_tokens(response)
|
|
345
|
+
yield response
|
|
346
|
+
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
347
|
+
if max_tokens is not None and total_count >= max_tokens:
|
|
348
|
+
break
|
|
349
|
+
|
|
309
350
|
@overload
|
|
310
351
|
def from_text(
|
|
311
352
|
self,
|
|
@@ -459,7 +500,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
459
500
|
|
|
460
501
|
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
461
502
|
if autocomplete:
|
|
462
|
-
while
|
|
503
|
+
while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
|
|
463
504
|
BaseLLMClient._append_generated_part(messages, response)
|
|
464
505
|
|
|
465
506
|
response = await self._create(
|
|
@@ -609,7 +650,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
609
650
|
@logfire_decorators.create_stream_async
|
|
610
651
|
@utils.retry_cls_async
|
|
611
652
|
@utils.rpm_limit_cls_async
|
|
612
|
-
async def
|
|
653
|
+
async def _create_stream(
|
|
613
654
|
self,
|
|
614
655
|
messages: list[Content],
|
|
615
656
|
*,
|
|
@@ -619,6 +660,48 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
619
660
|
) -> AsyncIterator[Response]:
|
|
620
661
|
raise NotImplementedError
|
|
621
662
|
|
|
663
|
+
async def create_stream(
|
|
664
|
+
self,
|
|
665
|
+
messages: list[Content],
|
|
666
|
+
*,
|
|
667
|
+
thinking_config: ThinkingConfig | None = None,
|
|
668
|
+
system_message: str | None = None,
|
|
669
|
+
max_tokens: int | None = None,
|
|
670
|
+
autocomplete: bool = False,
|
|
671
|
+
) -> AsyncIterator[Response]:
|
|
672
|
+
if max_tokens is None:
|
|
673
|
+
max_tokens = self.default_max_tokens
|
|
674
|
+
|
|
675
|
+
total_count = 0
|
|
676
|
+
stream_iter = await self._create_stream(
|
|
677
|
+
messages=messages,
|
|
678
|
+
thinking_config=thinking_config,
|
|
679
|
+
system_message=system_message,
|
|
680
|
+
max_tokens=max_tokens if not autocomplete else None,
|
|
681
|
+
)
|
|
682
|
+
response = None
|
|
683
|
+
async for response in stream_iter:
|
|
684
|
+
BaseLLMClient._append_generated_part(messages, response)
|
|
685
|
+
total_count += BaseLLMClient._response_out_tokens(response)
|
|
686
|
+
yield response
|
|
687
|
+
|
|
688
|
+
finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
|
|
689
|
+
if finish_reason and autocomplete:
|
|
690
|
+
while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
|
|
691
|
+
stream_iter = await self._create_stream(
|
|
692
|
+
messages=messages,
|
|
693
|
+
thinking_config=thinking_config,
|
|
694
|
+
system_message=system_message,
|
|
695
|
+
max_tokens=max_tokens if not autocomplete else None,
|
|
696
|
+
)
|
|
697
|
+
async for response in stream_iter:
|
|
698
|
+
yield response
|
|
699
|
+
BaseLLMClient._append_generated_part(messages, response)
|
|
700
|
+
total_count += BaseLLMClient._response_out_tokens(response)
|
|
701
|
+
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
702
|
+
if max_tokens is not None and total_count >= max_tokens:
|
|
703
|
+
break
|
|
704
|
+
|
|
622
705
|
@overload
|
|
623
706
|
async def from_text(
|
|
624
707
|
self,
|
|
@@ -266,7 +266,7 @@ class BedrockLLMClient(BaseLLMClient):
|
|
|
266
266
|
)
|
|
267
267
|
|
|
268
268
|
@_error_handler
|
|
269
|
-
def
|
|
269
|
+
def _create_stream(
|
|
270
270
|
self,
|
|
271
271
|
messages: list[Content],
|
|
272
272
|
*,
|
|
@@ -560,7 +560,7 @@ class BedrockLLMClientAsync(BaseLLMClientAsync):
|
|
|
560
560
|
)
|
|
561
561
|
|
|
562
562
|
@_error_handler_async
|
|
563
|
-
async def
|
|
563
|
+
async def _create_stream(
|
|
564
564
|
self,
|
|
565
565
|
messages: list[Content],
|
|
566
566
|
*,
|
|
@@ -142,7 +142,7 @@ class GoogleLLMClient(BaseLLMClient):
|
|
|
142
142
|
raise ValueError(f"Unsupported result_type: {result_type}. Supported types are: None, 'json', or a Pydantic model.")
|
|
143
143
|
|
|
144
144
|
@_error_handler
|
|
145
|
-
def
|
|
145
|
+
def _create_stream(
|
|
146
146
|
self,
|
|
147
147
|
messages: list[Content],
|
|
148
148
|
*,
|
|
@@ -163,7 +163,7 @@ class GoogleLLMClient(BaseLLMClient):
|
|
|
163
163
|
|
|
164
164
|
response = self.client.models.generate_content_stream(
|
|
165
165
|
model=self.model,
|
|
166
|
-
contents=messages,
|
|
166
|
+
contents=[msg.model_dump() for msg in messages],
|
|
167
167
|
config=config,
|
|
168
168
|
)
|
|
169
169
|
return response
|
|
@@ -290,7 +290,7 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
|
|
|
290
290
|
raise ValueError(f"Unsupported result_type: {result_type}. Supported types are: None, 'json', or a Pydantic model.")
|
|
291
291
|
|
|
292
292
|
@_error_handler_async
|
|
293
|
-
async def
|
|
293
|
+
async def _create_stream(
|
|
294
294
|
self,
|
|
295
295
|
messages: list[Content],
|
|
296
296
|
*,
|
|
@@ -311,7 +311,7 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
|
|
|
311
311
|
|
|
312
312
|
response = await self.client.aio.models.generate_content_stream(
|
|
313
313
|
model=self.model,
|
|
314
|
-
contents=messages,
|
|
314
|
+
contents=[msg.model_dump() for msg in messages],
|
|
315
315
|
config=config,
|
|
316
316
|
)
|
|
317
317
|
return response
|
|
@@ -241,7 +241,7 @@ class LiteLLMClient(BaseLLMClient):
|
|
|
241
241
|
finish_reason_val = first_choice.get("finish_reason")
|
|
242
242
|
else:
|
|
243
243
|
finish_reason_val = getattr(first_choice, "finish_reason", None)
|
|
244
|
-
mapped_finish_reason =
|
|
244
|
+
mapped_finish_reason = LiteLLMLLMClient._map_finish_reason(finish_reason_val)
|
|
245
245
|
|
|
246
246
|
content_parts: list[Part | Any] = list(parts)
|
|
247
247
|
return Response(
|
|
@@ -293,7 +293,7 @@ class LiteLLMClient(BaseLLMClient):
|
|
|
293
293
|
finish_reason_val = first_choice.get("finish_reason")
|
|
294
294
|
else:
|
|
295
295
|
finish_reason_val = getattr(first_choice, "finish_reason", None)
|
|
296
|
-
mapped_finish_reason =
|
|
296
|
+
mapped_finish_reason = LiteLLMLLMClient._map_finish_reason(finish_reason_val)
|
|
297
297
|
|
|
298
298
|
content_parts2: list[Part | Any] = list(parts)
|
|
299
299
|
return Response(
|
|
@@ -310,6 +310,125 @@ class LiteLLMClient(BaseLLMClient):
|
|
|
310
310
|
else:
|
|
311
311
|
raise ValueError(f"Unsupported result_type: {result_type}. Supported types are: None, 'json', or a Pydantic model.")
|
|
312
312
|
|
|
313
|
+
def _create_stream(
|
|
314
|
+
self,
|
|
315
|
+
messages: list[Content],
|
|
316
|
+
*,
|
|
317
|
+
thinking_config: ThinkingConfig | None = None,
|
|
318
|
+
system_message: str | None = None,
|
|
319
|
+
max_tokens: int | None = None,
|
|
320
|
+
timeout: float | None = None,
|
|
321
|
+
tools: list[Tool] | None = None,
|
|
322
|
+
tool_config: ToolConfig = ToolConfig(),
|
|
323
|
+
):
|
|
324
|
+
"""Streaming variant: yields Response objects with partial text/function calls.
|
|
325
|
+
|
|
326
|
+
Only supports plain text/function-call streaming (no structured pydantic parsing mid-stream).
|
|
327
|
+
Final yielded Response contains usage + finish_reason.
|
|
328
|
+
"""
|
|
329
|
+
litellm_messages: list[dict[str, str]] = []
|
|
330
|
+
if system_message is not None:
|
|
331
|
+
litellm_messages.append({"role": "system", "content": system_message})
|
|
332
|
+
for message in messages:
|
|
333
|
+
if message.role == "user":
|
|
334
|
+
litellm_messages.append({"role": "user", "content": message.as_str()})
|
|
335
|
+
elif message.role == "model":
|
|
336
|
+
litellm_messages.append({"role": "assistant", "content": message.as_str()})
|
|
337
|
+
|
|
338
|
+
litellm_model = f"{self.provider}/{self.model}"
|
|
339
|
+
kwargs: dict[str, Any] = {
|
|
340
|
+
"model": litellm_model,
|
|
341
|
+
"messages": litellm_messages,
|
|
342
|
+
"stream": True,
|
|
343
|
+
}
|
|
344
|
+
if self._api_key:
|
|
345
|
+
kwargs["api_key"] = self._api_key
|
|
346
|
+
if self.provider == "ollama":
|
|
347
|
+
base_url = os.getenv("OLLAMA_BASE_URL") or os.getenv("LITELLM_OLLAMA_BASE_URL")
|
|
348
|
+
if base_url:
|
|
349
|
+
kwargs["api_base"] = base_url
|
|
350
|
+
if max_tokens is not None:
|
|
351
|
+
kwargs["max_tokens"] = max_tokens
|
|
352
|
+
if timeout is not None:
|
|
353
|
+
kwargs["request_timeout"] = timeout
|
|
354
|
+
|
|
355
|
+
if tools is not None:
|
|
356
|
+
lite_tools = []
|
|
357
|
+
allowed_function_names = None
|
|
358
|
+
if tool_config.function_calling_config is not None:
|
|
359
|
+
allowed_function_names = tool_config.function_calling_config.allowed_function_names
|
|
360
|
+
for tool in tools:
|
|
361
|
+
for func_decl in tool.function_declarations or []:
|
|
362
|
+
if allowed_function_names is None or func_decl.name in allowed_function_names:
|
|
363
|
+
parameters = func_decl.parameters
|
|
364
|
+
if parameters is not None:
|
|
365
|
+
parameters = parameters.model_dump(exclude_none=True)
|
|
366
|
+
else:
|
|
367
|
+
parameters = {"type": "object", "properties": {}, "required": [], "additionalProperties": False}
|
|
368
|
+
lite_tools.append({
|
|
369
|
+
"type": "function",
|
|
370
|
+
"function": {
|
|
371
|
+
"name": func_decl.name,
|
|
372
|
+
"description": func_decl.description,
|
|
373
|
+
"parameters": parameters,
|
|
374
|
+
},
|
|
375
|
+
})
|
|
376
|
+
if lite_tools:
|
|
377
|
+
kwargs["tools"] = lite_tools
|
|
378
|
+
|
|
379
|
+
stream_iter = litellm.completion(**kwargs)
|
|
380
|
+
# LiteLLM returns a generator of events / chunks.
|
|
381
|
+
# We'll accumulate text, track usage and finish_reason when present.
|
|
382
|
+
accumulated_parts: list[Part] = []
|
|
383
|
+
final_usage = None
|
|
384
|
+
finish_reason_val = None
|
|
385
|
+
|
|
386
|
+
for chunk in stream_iter: # type: ignore
|
|
387
|
+
# Attempt to extract delta content (OpenAI style)
|
|
388
|
+
choices = getattr(chunk, "choices", None) or (chunk.get("choices") if isinstance(chunk, dict) else None)
|
|
389
|
+
if choices:
|
|
390
|
+
delta_choice = choices[0]
|
|
391
|
+
# finish_reason may appear early; capture last non-null
|
|
392
|
+
fr = None
|
|
393
|
+
if isinstance(delta_choice, dict):
|
|
394
|
+
fr = delta_choice.get("finish_reason")
|
|
395
|
+
delta_msg = delta_choice.get("delta") or delta_choice.get("message") or {}
|
|
396
|
+
else:
|
|
397
|
+
fr = getattr(delta_choice, "finish_reason", None)
|
|
398
|
+
delta_msg = getattr(delta_choice, "delta", None) or getattr(delta_choice, "message", None) or {}
|
|
399
|
+
if fr is not None:
|
|
400
|
+
finish_reason_val = fr
|
|
401
|
+
# Handle tool calls if present in streaming (rare - ignoring detailed incremental args for now)
|
|
402
|
+
content_piece = None
|
|
403
|
+
if isinstance(delta_msg, dict):
|
|
404
|
+
content_piece = delta_msg.get("content")
|
|
405
|
+
else:
|
|
406
|
+
content_piece = getattr(delta_msg, "content", None)
|
|
407
|
+
if content_piece:
|
|
408
|
+
accumulated_parts.append(Part(text=content_piece))
|
|
409
|
+
yield Response(candidates=[Candidate(content=Content(parts=[Part(text=content_piece)], role="model"))])
|
|
410
|
+
# Usage may appear at final chunk in some providers (OpenAI style: usage object)
|
|
411
|
+
# Collect usage if present as attribute or key
|
|
412
|
+
usage_obj = None
|
|
413
|
+
if isinstance(chunk, dict):
|
|
414
|
+
usage_obj = chunk.get("usage")
|
|
415
|
+
else:
|
|
416
|
+
usage_obj = getattr(chunk, "usage", None)
|
|
417
|
+
if usage_obj is not None:
|
|
418
|
+
final_usage = usage_obj
|
|
419
|
+
|
|
420
|
+
# After stream ends, emit final Response with aggregated parts, usage, and finish_reason
|
|
421
|
+
usage_md = self.make_usage_metadata(final_usage)
|
|
422
|
+
mapped_finish_reason = LiteLLMClient._map_finish_reason(finish_reason_val)
|
|
423
|
+
final_parts: list[Part | Any] = list(accumulated_parts)
|
|
424
|
+
yield Response(
|
|
425
|
+
candidates=[Candidate(
|
|
426
|
+
content=Content(parts=final_parts, role="model"),
|
|
427
|
+
finish_reason=mapped_finish_reason,
|
|
428
|
+
)],
|
|
429
|
+
usage_metadata=usage_md,
|
|
430
|
+
)
|
|
431
|
+
|
|
313
432
|
|
|
314
433
|
class LiteLLMClientAsync(BaseLLMClientAsync):
|
|
315
434
|
provider: str = ""
|
|
@@ -341,11 +460,11 @@ class LiteLLMClientAsync(BaseLLMClientAsync):
|
|
|
341
460
|
|
|
342
461
|
@staticmethod
|
|
343
462
|
def make_function_call(tool_call) -> FunctionCall | None:
|
|
344
|
-
return
|
|
463
|
+
return LiteLLMLLMClient.make_function_call(tool_call)
|
|
345
464
|
|
|
346
465
|
@staticmethod
|
|
347
466
|
def make_usage_metadata(usage) -> UsageMetadata:
|
|
348
|
-
return
|
|
467
|
+
return LiteLLMLLMClient.make_usage_metadata(usage)
|
|
349
468
|
|
|
350
469
|
async def _create(
|
|
351
470
|
self,
|
|
@@ -450,7 +569,7 @@ class LiteLLMClientAsync(BaseLLMClientAsync):
|
|
|
450
569
|
finish_reason_val = first_choice.get("finish_reason")
|
|
451
570
|
else:
|
|
452
571
|
finish_reason_val = getattr(first_choice, "finish_reason", None)
|
|
453
|
-
mapped_finish_reason =
|
|
572
|
+
mapped_finish_reason = LiteLLMLLMClient._map_finish_reason(finish_reason_val)
|
|
454
573
|
|
|
455
574
|
content_parts3: list[Part | Any] = list(parts)
|
|
456
575
|
return Response(
|
|
@@ -502,7 +621,7 @@ class LiteLLMClientAsync(BaseLLMClientAsync):
|
|
|
502
621
|
finish_reason_val = first_choice.get("finish_reason")
|
|
503
622
|
else:
|
|
504
623
|
finish_reason_val = getattr(first_choice, "finish_reason", None)
|
|
505
|
-
mapped_finish_reason =
|
|
624
|
+
mapped_finish_reason = LiteLLMLLMClient._map_finish_reason(finish_reason_val)
|
|
506
625
|
|
|
507
626
|
content_parts4: list[Part | Any] = list(parts)
|
|
508
627
|
return Response(
|
|
@@ -518,3 +637,111 @@ class LiteLLMClientAsync(BaseLLMClientAsync):
|
|
|
518
637
|
)
|
|
519
638
|
else:
|
|
520
639
|
raise ValueError(f"Unsupported result_type: {result_type}. Supported types are: None, 'json', or a Pydantic model.")
|
|
640
|
+
|
|
641
|
+
async def _create_stream(
|
|
642
|
+
self,
|
|
643
|
+
messages: list[Content],
|
|
644
|
+
*,
|
|
645
|
+
thinking_config: ThinkingConfig | None = None,
|
|
646
|
+
system_message: str | None = None,
|
|
647
|
+
max_tokens: int | None = None,
|
|
648
|
+
timeout: float | None = None,
|
|
649
|
+
tools: list[Tool] | None = None,
|
|
650
|
+
tool_config: ToolConfig = ToolConfig(),
|
|
651
|
+
):
|
|
652
|
+
"""Async streaming variant mirroring sync version."""
|
|
653
|
+
litellm_messages: list[dict[str, str]] = []
|
|
654
|
+
if system_message is not None:
|
|
655
|
+
litellm_messages.append({"role": "system", "content": system_message})
|
|
656
|
+
for message in messages:
|
|
657
|
+
if message.role == "user":
|
|
658
|
+
litellm_messages.append({"role": "user", "content": message.as_str()})
|
|
659
|
+
elif message.role == "model":
|
|
660
|
+
litellm_messages.append({"role": "assistant", "content": message.as_str()})
|
|
661
|
+
|
|
662
|
+
litellm_model = f"{self.provider}/{self.model}"
|
|
663
|
+
kwargs: dict[str, Any] = {
|
|
664
|
+
"model": litellm_model,
|
|
665
|
+
"messages": litellm_messages,
|
|
666
|
+
"stream": True,
|
|
667
|
+
}
|
|
668
|
+
if self._api_key:
|
|
669
|
+
kwargs["api_key"] = self._api_key
|
|
670
|
+
if self.provider == "ollama":
|
|
671
|
+
base_url = os.getenv("OLLAMA_BASE_URL") or os.getenv("LITELLM_OLLAMA_BASE_URL")
|
|
672
|
+
if base_url:
|
|
673
|
+
kwargs["api_base"] = base_url
|
|
674
|
+
if max_tokens is not None:
|
|
675
|
+
kwargs["max_tokens"] = max_tokens
|
|
676
|
+
if timeout is not None:
|
|
677
|
+
kwargs["request_timeout"] = timeout
|
|
678
|
+
|
|
679
|
+
if tools is not None:
|
|
680
|
+
lite_tools = []
|
|
681
|
+
allowed_function_names = None
|
|
682
|
+
if tool_config.function_calling_config is not None:
|
|
683
|
+
allowed_function_names = tool_config.function_calling_config.allowed_function_names
|
|
684
|
+
for tool in tools:
|
|
685
|
+
for func_decl in tool.function_declarations or []:
|
|
686
|
+
if allowed_function_names is None or func_decl.name in allowed_function_names:
|
|
687
|
+
parameters = func_decl.parameters
|
|
688
|
+
if parameters is not None:
|
|
689
|
+
parameters = parameters.model_dump(exclude_none=True)
|
|
690
|
+
else:
|
|
691
|
+
parameters = {"type": "object", "properties": {}, "required": [], "additionalProperties": False}
|
|
692
|
+
lite_tools.append({
|
|
693
|
+
"type": "function",
|
|
694
|
+
"function": {
|
|
695
|
+
"name": func_decl.name,
|
|
696
|
+
"description": func_decl.description,
|
|
697
|
+
"parameters": parameters,
|
|
698
|
+
},
|
|
699
|
+
})
|
|
700
|
+
if lite_tools:
|
|
701
|
+
kwargs["tools"] = lite_tools
|
|
702
|
+
|
|
703
|
+
stream_iter = await litellm.acompletion(**kwargs)
|
|
704
|
+
|
|
705
|
+
accumulated_parts: list[Part] = []
|
|
706
|
+
final_usage = None
|
|
707
|
+
finish_reason_val = None
|
|
708
|
+
|
|
709
|
+
async for chunk in stream_iter: # type: ignore
|
|
710
|
+
choices = getattr(chunk, "choices", None) or (chunk.get("choices") if isinstance(chunk, dict) else None)
|
|
711
|
+
if choices:
|
|
712
|
+
delta_choice = choices[0]
|
|
713
|
+
fr = None
|
|
714
|
+
if isinstance(delta_choice, dict):
|
|
715
|
+
fr = delta_choice.get("finish_reason")
|
|
716
|
+
delta_msg = delta_choice.get("delta") or delta_choice.get("message") or {}
|
|
717
|
+
else:
|
|
718
|
+
fr = getattr(delta_choice, "finish_reason", None)
|
|
719
|
+
delta_msg = getattr(delta_choice, "delta", None) or getattr(delta_choice, "message", None) or {}
|
|
720
|
+
if fr is not None:
|
|
721
|
+
finish_reason_val = fr
|
|
722
|
+
content_piece = None
|
|
723
|
+
if isinstance(delta_msg, dict):
|
|
724
|
+
content_piece = delta_msg.get("content")
|
|
725
|
+
else:
|
|
726
|
+
content_piece = getattr(delta_msg, "content", None)
|
|
727
|
+
if content_piece:
|
|
728
|
+
accumulated_parts.append(Part(text=content_piece))
|
|
729
|
+
yield Response(candidates=[Candidate(content=Content(parts=[Part(text=content_piece)], role="model"))])
|
|
730
|
+
usage_obj = None
|
|
731
|
+
if isinstance(chunk, dict):
|
|
732
|
+
usage_obj = chunk.get("usage")
|
|
733
|
+
else:
|
|
734
|
+
usage_obj = getattr(chunk, "usage", None)
|
|
735
|
+
if usage_obj is not None:
|
|
736
|
+
final_usage = usage_obj
|
|
737
|
+
|
|
738
|
+
usage_md = self.make_usage_metadata(final_usage)
|
|
739
|
+
mapped_finish_reason = LiteLLMClient._map_finish_reason(finish_reason_val)
|
|
740
|
+
final_parts_async: list[Part | Any] = list(accumulated_parts)
|
|
741
|
+
yield Response(
|
|
742
|
+
candidates=[Candidate(
|
|
743
|
+
content=Content(parts=final_parts_async, role="model"),
|
|
744
|
+
finish_reason=mapped_finish_reason,
|
|
745
|
+
)],
|
|
746
|
+
usage_metadata=usage_md,
|
|
747
|
+
)
|
{promptbuilder-0.4.34 → promptbuilder-0.4.36}/promptbuilder/llm_client/logfire_decorators.py
RENAMED
|
@@ -54,6 +54,21 @@ def extract_response_data(response: Response) -> dict[str, Any]:
|
|
|
54
54
|
return response_data
|
|
55
55
|
|
|
56
56
|
|
|
57
|
+
def record(span: logfire.LogfireSpan, duration: float, response: Response):
|
|
58
|
+
span.set_attribute("duration", duration)
|
|
59
|
+
|
|
60
|
+
span.set_attribute("response_data", extract_response_data(response))
|
|
61
|
+
span.set_attribute("candidates", response.candidates)
|
|
62
|
+
span.set_attribute("parsed", response.parsed)
|
|
63
|
+
span.set_attribute("response_text", response.text)
|
|
64
|
+
if response.usage_metadata is not None:
|
|
65
|
+
span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
|
|
66
|
+
span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
|
|
67
|
+
span.set_attribute("usage_metadata.thoughts_token_count", response.usage_metadata.thoughts_token_count)
|
|
68
|
+
span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
|
|
69
|
+
span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
|
|
70
|
+
|
|
71
|
+
|
|
57
72
|
@inherited_decorator
|
|
58
73
|
def create(class_method: Callable[P, Response]) -> Callable[P, Response]:
|
|
59
74
|
"""
|
|
@@ -69,17 +84,7 @@ def create(class_method: Callable[P, Response]) -> Callable[P, Response]:
|
|
|
69
84
|
with logfire_llm.span(f"Create with {span_data["full_model_name"]}", **span_data) as span:
|
|
70
85
|
start_time = time.time()
|
|
71
86
|
response = class_method(self, *args, **kwargs)
|
|
72
|
-
span
|
|
73
|
-
|
|
74
|
-
span.set_attribute("response_data", extract_response_data(response))
|
|
75
|
-
span.set_attribute("candidates", response.candidates)
|
|
76
|
-
span.set_attribute("parsed", response.parsed)
|
|
77
|
-
span.set_attribute("response_text", response.text)
|
|
78
|
-
if response.usage_metadata is not None:
|
|
79
|
-
span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
|
|
80
|
-
span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
|
|
81
|
-
span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
|
|
82
|
-
span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
|
|
87
|
+
record(span, time.time() - start_time, response)
|
|
83
88
|
|
|
84
89
|
return response
|
|
85
90
|
|
|
@@ -101,17 +106,7 @@ def create_async(class_method: Callable[P, Awaitable[Response]]) -> Callable[P,
|
|
|
101
106
|
with logfire_llm.span(f"Async create with {span_data["full_model_name"]}", **span_data) as span:
|
|
102
107
|
start_time = time.time()
|
|
103
108
|
response = await class_method(self, *args, **kwargs)
|
|
104
|
-
span
|
|
105
|
-
|
|
106
|
-
span.set_attribute("response_data", extract_response_data(response))
|
|
107
|
-
span.set_attribute("candidates", response.candidates)
|
|
108
|
-
span.set_attribute("parsed", response.parsed)
|
|
109
|
-
span.set_attribute("response_text", response.text)
|
|
110
|
-
if response.usage_metadata is not None:
|
|
111
|
-
span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
|
|
112
|
-
span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
|
|
113
|
-
span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
|
|
114
|
-
span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
|
|
109
|
+
record(span, time.time() - start_time, response)
|
|
115
110
|
|
|
116
111
|
return response
|
|
117
112
|
|
|
@@ -150,6 +145,7 @@ def record_streaming(span: logfire.LogfireSpan):
|
|
|
150
145
|
span.set_attribute("response_text", stream_state.get_response_data()["message"]["content"])
|
|
151
146
|
span.set_attribute("usage_metadata.cached_content_token_count", stream_state.last_usage_data.cached_content_token_count)
|
|
152
147
|
span.set_attribute("usage_metadata.candidates_token_count", stream_state.last_usage_data.candidates_token_count)
|
|
148
|
+
span.set_attribute("usage_metadata.thoughts_token_count", stream_state.last_usage_data.thoughts_token_count)
|
|
153
149
|
span.set_attribute("usage_metadata.prompt_token_count", stream_state.last_usage_data.prompt_token_count)
|
|
154
150
|
span.set_attribute("usage_metadata.total_token_count", stream_state.last_usage_data.total_token_count)
|
|
155
151
|
|
|
@@ -260,7 +260,7 @@ class OpenaiLLMClient(BaseLLMClient):
|
|
|
260
260
|
raise ValueError(f"Unsupported result type: {result_type}. Supported types are None, 'json', or a Pydantic model class.")
|
|
261
261
|
|
|
262
262
|
@_error_handler
|
|
263
|
-
def
|
|
263
|
+
def _create_stream(
|
|
264
264
|
self,
|
|
265
265
|
messages: list[Content],
|
|
266
266
|
*,
|
|
@@ -506,7 +506,7 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
|
|
|
506
506
|
raise ValueError(f"Unsupported result_type: {result_type}. Supported types are: None, 'json', or a Pydantic model.")
|
|
507
507
|
|
|
508
508
|
@_error_handler_async
|
|
509
|
-
async def
|
|
509
|
+
async def _create_stream(
|
|
510
510
|
self,
|
|
511
511
|
messages: list[Content],
|
|
512
512
|
*,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|