promptbuilder 0.4.30__py3-none-any.whl → 0.4.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promptbuilder/llm_client/aisuite_client.py +2 -0
- promptbuilder/llm_client/anthropic_client.py +8 -0
- promptbuilder/llm_client/base_client.py +64 -40
- promptbuilder/llm_client/bedrock_client.py +9 -1
- promptbuilder/llm_client/google_client.py +12 -5
- promptbuilder/llm_client/litellm_client.py +7 -0
- promptbuilder/llm_client/main.py +13 -7
- promptbuilder/llm_client/openai_client.py +11 -1
- promptbuilder/llm_client/vertex_client.py +394 -0
- {promptbuilder-0.4.30.dist-info → promptbuilder-0.4.31.dist-info}/METADATA +4 -1
- {promptbuilder-0.4.30.dist-info → promptbuilder-0.4.31.dist-info}/RECORD +14 -13
- {promptbuilder-0.4.30.dist-info → promptbuilder-0.4.31.dist-info}/WHEEL +0 -0
- {promptbuilder-0.4.30.dist-info → promptbuilder-0.4.31.dist-info}/licenses/LICENSE +0 -0
- {promptbuilder-0.4.30.dist-info → promptbuilder-0.4.31.dist-info}/top_level.txt +0 -0
|
@@ -71,6 +71,7 @@ class AiSuiteLLMClient(BaseLLMClient):
|
|
|
71
71
|
thinking_config: ThinkingConfig = ThinkingConfig(),
|
|
72
72
|
system_message: str | None = None,
|
|
73
73
|
max_tokens: int | None = None,
|
|
74
|
+
timeout: float | None = None,
|
|
74
75
|
tools: list[Tool] | None = None,
|
|
75
76
|
tool_config: ToolConfig = ToolConfig(),
|
|
76
77
|
) -> Response:
|
|
@@ -230,6 +231,7 @@ class AiSuiteLLMClientAsync(BaseLLMClientAsync):
|
|
|
230
231
|
thinking_config: ThinkingConfig = ThinkingConfig(),
|
|
231
232
|
system_message: str | None = None,
|
|
232
233
|
max_tokens: int | None = None,
|
|
234
|
+
timeout: float | None = None,
|
|
233
235
|
tools: list[Tool] | None = None,
|
|
234
236
|
tool_config: ToolConfig = ToolConfig(),
|
|
235
237
|
) -> Response:
|
|
@@ -200,6 +200,7 @@ class AnthropicLLMClient(BaseLLMClient):
|
|
|
200
200
|
thinking_config: ThinkingConfig | None = None,
|
|
201
201
|
system_message: str | None = None,
|
|
202
202
|
max_tokens: int | None = None,
|
|
203
|
+
timeout: float | None = None,
|
|
203
204
|
tools: list[Tool] | None = None,
|
|
204
205
|
tool_config: ToolConfig = ToolConfig(),
|
|
205
206
|
) -> Response:
|
|
@@ -215,6 +216,9 @@ class AnthropicLLMClient(BaseLLMClient):
|
|
|
215
216
|
"max_tokens": max_tokens,
|
|
216
217
|
"messages": anthropic_messages,
|
|
217
218
|
}
|
|
219
|
+
|
|
220
|
+
if timeout is not None:
|
|
221
|
+
anthropic_kwargs["timeout"] = timeout
|
|
218
222
|
|
|
219
223
|
if thinking_config is None:
|
|
220
224
|
thinking_config = self.default_thinking_config
|
|
@@ -453,6 +457,7 @@ class AnthropicLLMClientAsync(BaseLLMClientAsync):
|
|
|
453
457
|
thinking_config: ThinkingConfig | None = None,
|
|
454
458
|
system_message: str | None = None,
|
|
455
459
|
max_tokens: int | None = None,
|
|
460
|
+
timeout: float | None = None,
|
|
456
461
|
tools: list[Tool] | None = None,
|
|
457
462
|
tool_config: ToolConfig = ToolConfig(),
|
|
458
463
|
) -> Response:
|
|
@@ -474,6 +479,9 @@ class AnthropicLLMClientAsync(BaseLLMClientAsync):
|
|
|
474
479
|
"max_tokens": max_tokens,
|
|
475
480
|
"messages": anthropic_messages,
|
|
476
481
|
}
|
|
482
|
+
|
|
483
|
+
if timeout is not None:
|
|
484
|
+
anthropic_kwargs["timeout"] = timeout
|
|
477
485
|
|
|
478
486
|
if thinking_config is None:
|
|
479
487
|
thinking_config = self.default_thinking_config
|
|
@@ -82,6 +82,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
82
82
|
thinking_config: ThinkingConfig | None = None,
|
|
83
83
|
system_message: str | None = None,
|
|
84
84
|
max_tokens: int | None = None,
|
|
85
|
+
timeout: float | None = None,
|
|
85
86
|
tools: list[Tool] | None = None,
|
|
86
87
|
tool_config: ToolConfig = ToolConfig(),
|
|
87
88
|
autocomplete: bool = False
|
|
@@ -98,6 +99,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
98
99
|
thinking_config=thinking_config,
|
|
99
100
|
system_message=system_message,
|
|
100
101
|
max_tokens=max_tokens,
|
|
102
|
+
timeout=timeout,
|
|
101
103
|
tools=tools,
|
|
102
104
|
tool_config=tool_config,
|
|
103
105
|
)
|
|
@@ -105,26 +107,28 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
105
107
|
total_count = BaseLLMClient._response_out_tokens(response)
|
|
106
108
|
|
|
107
109
|
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if
|
|
127
|
-
|
|
110
|
+
if autocomplete:
|
|
111
|
+
while autocomplete and response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
|
|
112
|
+
BaseLLMClient._append_generated_part(messages, response)
|
|
113
|
+
|
|
114
|
+
response = self._create(
|
|
115
|
+
messages=messages,
|
|
116
|
+
result_type=result_type,
|
|
117
|
+
thinking_config=thinking_config,
|
|
118
|
+
system_message=system_message,
|
|
119
|
+
max_tokens=max_tokens,
|
|
120
|
+
timeout=timeout,
|
|
121
|
+
tools=tools,
|
|
122
|
+
tool_config=tool_config,
|
|
123
|
+
)
|
|
124
|
+
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
125
|
+
total_count += BaseLLMClient._response_out_tokens(response)
|
|
126
|
+
if max_tokens is not None and total_count >= max_tokens:
|
|
127
|
+
break
|
|
128
|
+
if response.candidates and response.candidates[0].content:
|
|
129
|
+
appended_message = BaseLLMClient._append_generated_part(messages, response)
|
|
130
|
+
if appended_message is not None:
|
|
131
|
+
response.candidates[0].content = appended_message
|
|
128
132
|
return response
|
|
129
133
|
|
|
130
134
|
@logfire_decorators.create
|
|
@@ -139,6 +143,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
139
143
|
thinking_config: ThinkingConfig | None = None,
|
|
140
144
|
system_message: str | None = None,
|
|
141
145
|
max_tokens: int | None = None,
|
|
146
|
+
timeout: float | None = None,
|
|
142
147
|
tools: list[Tool] | None = None,
|
|
143
148
|
tool_config: ToolConfig = ToolConfig(),
|
|
144
149
|
) -> Response:
|
|
@@ -153,6 +158,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
153
158
|
thinking_config: ThinkingConfig | None = None,
|
|
154
159
|
system_message: str | None = None,
|
|
155
160
|
max_tokens: int | None = None,
|
|
161
|
+
timeout: float | None = None,
|
|
156
162
|
tools: None = None,
|
|
157
163
|
tool_choice_mode: Literal["NONE"] = "NONE",
|
|
158
164
|
autocomplete: bool = False,
|
|
@@ -166,6 +172,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
166
172
|
thinking_config: ThinkingConfig | None = None,
|
|
167
173
|
system_message: str | None = None,
|
|
168
174
|
max_tokens: int | None = None,
|
|
175
|
+
timeout: float | None = None,
|
|
169
176
|
tools: None = None,
|
|
170
177
|
tool_choice_mode: Literal["NONE"] = "NONE",
|
|
171
178
|
autocomplete: bool = False,
|
|
@@ -179,6 +186,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
179
186
|
thinking_config: ThinkingConfig | None = None,
|
|
180
187
|
system_message: str | None = None,
|
|
181
188
|
max_tokens: int | None = None,
|
|
189
|
+
timeout: float | None = None,
|
|
182
190
|
tools: None = None,
|
|
183
191
|
tool_choice_mode: Literal["NONE"] = "NONE",
|
|
184
192
|
autocomplete: bool = False,
|
|
@@ -192,6 +200,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
192
200
|
thinking_config: ThinkingConfig | None = None,
|
|
193
201
|
system_message: str | None = None,
|
|
194
202
|
max_tokens: int | None = None,
|
|
203
|
+
timeout: float | None = None,
|
|
195
204
|
tools: list[Tool],
|
|
196
205
|
tool_choice_mode: Literal["ANY"],
|
|
197
206
|
autocomplete: bool = False,
|
|
@@ -205,6 +214,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
205
214
|
thinking_config: ThinkingConfig | None = None,
|
|
206
215
|
system_message: str | None = None,
|
|
207
216
|
max_tokens: int | None = None,
|
|
217
|
+
timeout: float | None = None,
|
|
208
218
|
tools: list[Tool] | None = None,
|
|
209
219
|
tool_choice_mode: Literal["ANY", "NONE"] = "NONE",
|
|
210
220
|
autocomplete: bool = False,
|
|
@@ -216,6 +226,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
216
226
|
thinking_config=thinking_config,
|
|
217
227
|
system_message=system_message,
|
|
218
228
|
max_tokens=max_tokens,
|
|
229
|
+
timeout=timeout,
|
|
219
230
|
tools=tools,
|
|
220
231
|
tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
|
|
221
232
|
)
|
|
@@ -232,6 +243,7 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
|
|
|
232
243
|
thinking_config=thinking_config,
|
|
233
244
|
system_message=system_message,
|
|
234
245
|
max_tokens=max_tokens,
|
|
246
|
+
timeout=timeout,
|
|
235
247
|
tools=tools,
|
|
236
248
|
tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
|
|
237
249
|
autocomplete=autocomplete,
|
|
@@ -421,6 +433,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
421
433
|
thinking_config: ThinkingConfig | None = None,
|
|
422
434
|
system_message: str | None = None,
|
|
423
435
|
max_tokens: int | None = None,
|
|
436
|
+
timeout: float | None = None,
|
|
424
437
|
tools: list[Tool] | None = None,
|
|
425
438
|
tool_config: ToolConfig = ToolConfig(),
|
|
426
439
|
autocomplete: bool = False,
|
|
@@ -437,6 +450,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
437
450
|
thinking_config=thinking_config,
|
|
438
451
|
system_message=system_message,
|
|
439
452
|
max_tokens=max_tokens,
|
|
453
|
+
timeout=timeout,
|
|
440
454
|
tools=tools,
|
|
441
455
|
tool_config=tool_config,
|
|
442
456
|
)
|
|
@@ -444,26 +458,28 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
444
458
|
total_count = BaseLLMClient._response_out_tokens(response)
|
|
445
459
|
|
|
446
460
|
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
if
|
|
466
|
-
|
|
461
|
+
if autocomplete:
|
|
462
|
+
while autocomplete and response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
|
|
463
|
+
BaseLLMClient._append_generated_part(messages, response)
|
|
464
|
+
|
|
465
|
+
response = await self._create(
|
|
466
|
+
messages=messages,
|
|
467
|
+
result_type=result_type,
|
|
468
|
+
thinking_config=thinking_config,
|
|
469
|
+
system_message=system_message,
|
|
470
|
+
max_tokens=max_tokens,
|
|
471
|
+
timeout=timeout,
|
|
472
|
+
tools=tools,
|
|
473
|
+
tool_config=tool_config,
|
|
474
|
+
)
|
|
475
|
+
finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
|
|
476
|
+
total_count += BaseLLMClient._response_out_tokens(response)
|
|
477
|
+
if max_tokens is not None and total_count >= max_tokens:
|
|
478
|
+
break
|
|
479
|
+
if response.candidates and response.candidates[0].content:
|
|
480
|
+
appended_message = BaseLLMClient._append_generated_part(messages, response)
|
|
481
|
+
if appended_message is not None:
|
|
482
|
+
response.candidates[0].content = appended_message
|
|
467
483
|
return response
|
|
468
484
|
|
|
469
485
|
@logfire_decorators.create_async
|
|
@@ -478,6 +494,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
478
494
|
thinking_config: ThinkingConfig | None = None,
|
|
479
495
|
system_message: str | None = None,
|
|
480
496
|
max_tokens: int | None = None,
|
|
497
|
+
timeout: float | None = None,
|
|
481
498
|
tools: list[Tool] | None = None,
|
|
482
499
|
tool_config: ToolConfig = ToolConfig(),
|
|
483
500
|
) -> Response:
|
|
@@ -492,6 +509,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
492
509
|
thinking_config: ThinkingConfig | None = None,
|
|
493
510
|
system_message: str | None = None,
|
|
494
511
|
max_tokens: int | None = None,
|
|
512
|
+
timeout: float | None = None,
|
|
495
513
|
tools: None = None,
|
|
496
514
|
tool_choice_mode: Literal["NONE"] = "NONE",
|
|
497
515
|
autocomplete: bool = False,
|
|
@@ -505,6 +523,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
505
523
|
thinking_config: ThinkingConfig | None = None,
|
|
506
524
|
system_message: str | None = None,
|
|
507
525
|
max_tokens: int | None = None,
|
|
526
|
+
timeout: float | None = None,
|
|
508
527
|
tools: None = None,
|
|
509
528
|
tool_choice_mode: Literal["NONE"] = "NONE",
|
|
510
529
|
autocomplete: bool = False,
|
|
@@ -518,6 +537,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
518
537
|
thinking_config: ThinkingConfig | None = None,
|
|
519
538
|
system_message: str | None = None,
|
|
520
539
|
max_tokens: int | None = None,
|
|
540
|
+
timeout: float | None = None,
|
|
521
541
|
tools: None = None,
|
|
522
542
|
tool_choice_mode: Literal["NONE"] = "NONE",
|
|
523
543
|
autocomplete: bool = False,
|
|
@@ -531,6 +551,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
531
551
|
thinking_config: ThinkingConfig | None = None,
|
|
532
552
|
system_message: str | None = None,
|
|
533
553
|
max_tokens: int | None = None,
|
|
554
|
+
timeout: float | None = None,
|
|
534
555
|
tools: list[Tool],
|
|
535
556
|
tool_choice_mode: Literal["ANY"],
|
|
536
557
|
autocomplete: bool = False,
|
|
@@ -544,6 +565,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
544
565
|
thinking_config: ThinkingConfig | None = None,
|
|
545
566
|
system_message: str | None = None,
|
|
546
567
|
max_tokens: int | None = None,
|
|
568
|
+
timeout: float | None = None,
|
|
547
569
|
tools: list[Tool] | None = None,
|
|
548
570
|
tool_choice_mode: Literal["ANY", "NONE"] = "NONE",
|
|
549
571
|
autocomplete: bool = False,
|
|
@@ -555,6 +577,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
555
577
|
thinking_config=thinking_config,
|
|
556
578
|
system_message=system_message,
|
|
557
579
|
max_tokens=max_tokens,
|
|
580
|
+
timeout=timeout,
|
|
558
581
|
tools=tools,
|
|
559
582
|
tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
|
|
560
583
|
)
|
|
@@ -571,6 +594,7 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
|
|
|
571
594
|
thinking_config=thinking_config,
|
|
572
595
|
system_message=system_message,
|
|
573
596
|
max_tokens=max_tokens,
|
|
597
|
+
timeout=timeout,
|
|
574
598
|
tools=tools,
|
|
575
599
|
tool_config=ToolConfig(function_calling_config=FunctionCallingConfig(mode=tool_choice_mode)),
|
|
576
600
|
autocomplete=autocomplete
|
|
@@ -111,6 +111,7 @@ class BedrockLLMClient(BaseLLMClient):
|
|
|
111
111
|
thinking_config: ThinkingConfig | None = None,
|
|
112
112
|
system_message: str | None = None,
|
|
113
113
|
max_tokens: int | None = None,
|
|
114
|
+
timeout: float | None = None,
|
|
114
115
|
tools: list[Tool] | None = None,
|
|
115
116
|
tool_config: ToolConfig = ToolConfig(),
|
|
116
117
|
) -> Response:
|
|
@@ -120,7 +121,10 @@ class BedrockLLMClient(BaseLLMClient):
|
|
|
120
121
|
|
|
121
122
|
if system_message is not None:
|
|
122
123
|
bedrock_kwargs["system"] = [{"text": system_message}]
|
|
123
|
-
|
|
124
|
+
|
|
125
|
+
if timeout is not None:
|
|
126
|
+
bedrock_kwargs["timeout"] = timeout
|
|
127
|
+
|
|
124
128
|
if max_tokens is None:
|
|
125
129
|
max_tokens = self.default_max_tokens
|
|
126
130
|
if max_tokens is not None:
|
|
@@ -407,6 +411,7 @@ class BedrockLLMClientAsync(BaseLLMClientAsync):
|
|
|
407
411
|
thinking_config: ThinkingConfig | None = None,
|
|
408
412
|
system_message: str | None = None,
|
|
409
413
|
max_tokens: int | None = None,
|
|
414
|
+
timeout: float | None = None,
|
|
410
415
|
tools: list[Tool] | None = None,
|
|
411
416
|
tool_config: ToolConfig = ToolConfig(),
|
|
412
417
|
) -> Response:
|
|
@@ -417,6 +422,9 @@ class BedrockLLMClientAsync(BaseLLMClientAsync):
|
|
|
417
422
|
if system_message is not None:
|
|
418
423
|
bedrock_kwargs["system"] = [{"text": system_message}]
|
|
419
424
|
|
|
425
|
+
if timeout is not None:
|
|
426
|
+
bedrock_kwargs["timeout"] = timeout
|
|
427
|
+
|
|
420
428
|
if max_tokens is None:
|
|
421
429
|
max_tokens = self.default_max_tokens
|
|
422
430
|
if max_tokens is not None:
|
|
@@ -57,7 +57,7 @@ class GoogleLLMClient(BaseLLMClient):
|
|
|
57
57
|
raise ValueError("To create a google llm client you need to either set the environment variable GOOGLE_API_KEY or pass the api_key in string format")
|
|
58
58
|
super().__init__(GoogleLLMClient.PROVIDER, model, decorator_configs=decorator_configs, default_thinking_config=default_thinking_config, default_max_tokens=default_max_tokens)
|
|
59
59
|
self._api_key = api_key
|
|
60
|
-
self.client = Client(api_key=api_key)
|
|
60
|
+
self.client = Client(api_key=api_key, **kwargs)
|
|
61
61
|
|
|
62
62
|
@property
|
|
63
63
|
def api_key(self) -> str:
|
|
@@ -95,6 +95,7 @@ class GoogleLLMClient(BaseLLMClient):
|
|
|
95
95
|
thinking_config: ThinkingConfig | None = None,
|
|
96
96
|
system_message: str | None = None,
|
|
97
97
|
max_tokens: int | None = None,
|
|
98
|
+
timeout: float | None = None,
|
|
98
99
|
tools: list[Tool] | None = None,
|
|
99
100
|
tool_config: ToolConfig = ToolConfig(),
|
|
100
101
|
) -> Response:
|
|
@@ -107,6 +108,9 @@ class GoogleLLMClient(BaseLLMClient):
|
|
|
107
108
|
tools=tools,
|
|
108
109
|
tool_config=tool_config,
|
|
109
110
|
)
|
|
111
|
+
if timeout is not None:
|
|
112
|
+
# Google processes timeout via HttpOptions on the request/config
|
|
113
|
+
config.http_options = types.HttpOptions(timeout=int(timeout * 1_000))
|
|
110
114
|
|
|
111
115
|
if thinking_config is None:
|
|
112
116
|
thinking_config = self.default_thinking_config
|
|
@@ -233,8 +237,8 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
|
|
|
233
237
|
raise ValueError("To create a google llm client you need to either set the environment variable GOOGLE_API_KEY or pass the api_key in string format")
|
|
234
238
|
super().__init__(GoogleLLMClientAsync.PROVIDER, model, decorator_configs=decorator_configs, default_thinking_config=default_thinking_config, default_max_tokens=default_max_tokens)
|
|
235
239
|
self._api_key = api_key
|
|
236
|
-
self.client = Client(api_key=api_key)
|
|
237
|
-
|
|
240
|
+
self.client = Client(api_key=api_key, **kwargs)
|
|
241
|
+
|
|
238
242
|
@property
|
|
239
243
|
def api_key(self) -> str:
|
|
240
244
|
return self._api_key
|
|
@@ -248,6 +252,7 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
|
|
|
248
252
|
thinking_config: ThinkingConfig | None = None,
|
|
249
253
|
system_message: str | None = None,
|
|
250
254
|
max_tokens: int | None = None,
|
|
255
|
+
timeout: float | None = None,
|
|
251
256
|
tools: list[Tool] | None = None,
|
|
252
257
|
tool_config: ToolConfig = ToolConfig(),
|
|
253
258
|
) -> Response:
|
|
@@ -260,11 +265,13 @@ class GoogleLLMClientAsync(BaseLLMClientAsync):
|
|
|
260
265
|
tools=tools,
|
|
261
266
|
tool_config=tool_config,
|
|
262
267
|
)
|
|
263
|
-
|
|
268
|
+
if timeout is not None:
|
|
269
|
+
config.http_options = types.HttpOptions(timeout=int(timeout * 1_000))
|
|
270
|
+
|
|
264
271
|
if thinking_config is None:
|
|
265
272
|
thinking_config = self.default_thinking_config
|
|
266
273
|
config.thinking_config = thinking_config
|
|
267
|
-
|
|
274
|
+
|
|
268
275
|
if result_type is None or result_type == "json":
|
|
269
276
|
return await self.client.aio.models.generate_content(
|
|
270
277
|
model=self.model,
|
|
@@ -139,6 +139,7 @@ class LiteLLMLLMClient(BaseLLMClient):
|
|
|
139
139
|
thinking_config: ThinkingConfig | None = None,
|
|
140
140
|
system_message: str | None = None,
|
|
141
141
|
max_tokens: int | None = None,
|
|
142
|
+
timeout: float | None = None,
|
|
142
143
|
tools: list[Tool] | None = None,
|
|
143
144
|
tool_config: ToolConfig = ToolConfig(),
|
|
144
145
|
) -> Response:
|
|
@@ -167,6 +168,9 @@ class LiteLLMLLMClient(BaseLLMClient):
|
|
|
167
168
|
|
|
168
169
|
if max_tokens is not None:
|
|
169
170
|
kwargs["max_tokens"] = max_tokens
|
|
171
|
+
if timeout is not None:
|
|
172
|
+
# LiteLLM supports request_timeout in seconds
|
|
173
|
+
kwargs["request_timeout"] = timeout
|
|
170
174
|
|
|
171
175
|
if tools is not None:
|
|
172
176
|
lite_tools = []
|
|
@@ -351,6 +355,7 @@ class LiteLLMLLMClientAsync(BaseLLMClientAsync):
|
|
|
351
355
|
thinking_config: ThinkingConfig | None = None,
|
|
352
356
|
system_message: str | None = None,
|
|
353
357
|
max_tokens: int | None = None,
|
|
358
|
+
timeout: float | None = None,
|
|
354
359
|
tools: list[Tool] | None = None,
|
|
355
360
|
tool_config: ToolConfig = ToolConfig(),
|
|
356
361
|
) -> Response:
|
|
@@ -377,6 +382,8 @@ class LiteLLMLLMClientAsync(BaseLLMClientAsync):
|
|
|
377
382
|
|
|
378
383
|
if max_tokens is not None:
|
|
379
384
|
kwargs["max_tokens"] = max_tokens
|
|
385
|
+
if timeout is not None:
|
|
386
|
+
kwargs["request_timeout"] = timeout
|
|
380
387
|
|
|
381
388
|
if tools is not None:
|
|
382
389
|
lite_tools = []
|
promptbuilder/llm_client/main.py
CHANGED
|
@@ -10,7 +10,7 @@ from promptbuilder.llm_client.anthropic_client import AnthropicLLMClient, Anthro
|
|
|
10
10
|
from promptbuilder.llm_client.openai_client import OpenaiLLMClient, OpenaiLLMClientAsync
|
|
11
11
|
from promptbuilder.llm_client.bedrock_client import BedrockLLMClient, BedrockLLMClientAsync
|
|
12
12
|
from promptbuilder.llm_client.aisuite_client import AiSuiteLLMClient, AiSuiteLLMClientAsync
|
|
13
|
-
from promptbuilder.llm_client.litellm_client import
|
|
13
|
+
from promptbuilder.llm_client.litellm_client import LiteLLMClient, LiteLLMClientAsync
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
|
|
@@ -24,14 +24,17 @@ def get_client(
|
|
|
24
24
|
decorator_configs: DecoratorConfigs | None = None,
|
|
25
25
|
default_thinking_config: ThinkingConfig | None = None,
|
|
26
26
|
default_max_tokens: int | None = None,
|
|
27
|
+
**kwargs,
|
|
27
28
|
) -> BaseLLMClient:
|
|
28
29
|
global _memory
|
|
29
30
|
|
|
30
|
-
|
|
31
|
+
explicit_kwargs = {
|
|
31
32
|
"decorator_configs": decorator_configs,
|
|
32
33
|
"default_thinking_config": default_thinking_config,
|
|
33
34
|
"default_max_tokens": default_max_tokens,
|
|
34
35
|
}
|
|
36
|
+
# Merge explicit kwargs with additional kwargs, with explicit taking precedence
|
|
37
|
+
merged_kwargs = {**kwargs, **explicit_kwargs}
|
|
35
38
|
provider_to_client_class: dict[str, type[BaseLLMClient]] = {
|
|
36
39
|
"google": GoogleLLMClient,
|
|
37
40
|
"anthropic": AnthropicLLMClient,
|
|
@@ -41,9 +44,9 @@ def get_client(
|
|
|
41
44
|
provider, model = full_model_name.split(":", 1)
|
|
42
45
|
if provider in provider_to_client_class:
|
|
43
46
|
client_class = provider_to_client_class[provider]
|
|
44
|
-
client = client_class(model, api_key, **
|
|
47
|
+
client = client_class(model, api_key, **merged_kwargs)
|
|
45
48
|
else:
|
|
46
|
-
client =
|
|
49
|
+
client = LiteLLMClient(full_model_name, api_key, **merged_kwargs)
|
|
47
50
|
|
|
48
51
|
if (full_model_name, client.api_key) in _memory:
|
|
49
52
|
client = _memory[(full_model_name, client.api_key)]
|
|
@@ -65,14 +68,17 @@ def get_async_client(
|
|
|
65
68
|
decorator_configs: DecoratorConfigs | None = None,
|
|
66
69
|
default_thinking_config: ThinkingConfig | None = None,
|
|
67
70
|
default_max_tokens: int | None = None,
|
|
71
|
+
**kwargs,
|
|
68
72
|
) -> BaseLLMClientAsync:
|
|
69
73
|
global _memory_async
|
|
70
74
|
|
|
71
|
-
|
|
75
|
+
explicit_kwargs = {
|
|
72
76
|
"decorator_configs": decorator_configs,
|
|
73
77
|
"default_thinking_config": default_thinking_config,
|
|
74
78
|
"default_max_tokens": default_max_tokens,
|
|
75
79
|
}
|
|
80
|
+
# Merge explicit kwargs with additional kwargs, with explicit taking precedence
|
|
81
|
+
merged_kwargs = {**kwargs, **explicit_kwargs}
|
|
76
82
|
provider_to_client_class: dict[str, type[BaseLLMClientAsync]] = {
|
|
77
83
|
"google": GoogleLLMClientAsync,
|
|
78
84
|
"anthropic": AnthropicLLMClientAsync,
|
|
@@ -82,9 +88,9 @@ def get_async_client(
|
|
|
82
88
|
provider, model = full_model_name.split(":", 1)
|
|
83
89
|
if provider in provider_to_client_class:
|
|
84
90
|
client_class = provider_to_client_class[provider]
|
|
85
|
-
client = client_class(model, api_key, **
|
|
91
|
+
client = client_class(model, api_key, **merged_kwargs)
|
|
86
92
|
else:
|
|
87
|
-
client =
|
|
93
|
+
client = LiteLLMClientAsync(full_model_name, api_key, **merged_kwargs)
|
|
88
94
|
|
|
89
95
|
if (full_model_name, client.api_key) in _memory_async:
|
|
90
96
|
client = _memory_async[(full_model_name, client.api_key)]
|
|
@@ -150,6 +150,7 @@ class OpenaiLLMClient(BaseLLMClient):
|
|
|
150
150
|
thinking_config: ThinkingConfig | None = None,
|
|
151
151
|
system_message: str | None = None,
|
|
152
152
|
max_tokens: int | None = None,
|
|
153
|
+
timeout: float | None = None,
|
|
153
154
|
tools: list[Tool] | None = None,
|
|
154
155
|
tool_config: ToolConfig = ToolConfig(),
|
|
155
156
|
) -> Response:
|
|
@@ -205,6 +206,9 @@ class OpenaiLLMClient(BaseLLMClient):
|
|
|
205
206
|
openai_kwargs["tool_choice"] = "required"
|
|
206
207
|
|
|
207
208
|
if result_type is None or result_type == "json":
|
|
209
|
+
# Forward timeout to OpenAI per-request if provided
|
|
210
|
+
if timeout is not None:
|
|
211
|
+
openai_kwargs["timeout"] = timeout
|
|
208
212
|
response = self.client.responses.create(**openai_kwargs)
|
|
209
213
|
|
|
210
214
|
parts: list[Part] = []
|
|
@@ -227,6 +231,8 @@ class OpenaiLLMClient(BaseLLMClient):
|
|
|
227
231
|
),
|
|
228
232
|
)
|
|
229
233
|
elif isinstance(result_type, type(BaseModel)):
|
|
234
|
+
if timeout is not None:
|
|
235
|
+
openai_kwargs["timeout"] = timeout
|
|
230
236
|
response = self.client.responses.parse(**openai_kwargs, text_format=result_type)
|
|
231
237
|
|
|
232
238
|
parts: list[Part] = []
|
|
@@ -385,6 +391,7 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
|
|
|
385
391
|
thinking_config: ThinkingConfig | None = None,
|
|
386
392
|
system_message: str | None = None,
|
|
387
393
|
max_tokens: int | None = None,
|
|
394
|
+
timeout: float | None = None,
|
|
388
395
|
tools: list[Tool] | None = None,
|
|
389
396
|
tool_config: ToolConfig = ToolConfig(),
|
|
390
397
|
) -> Response:
|
|
@@ -447,8 +454,9 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
|
|
|
447
454
|
openai_kwargs["tool_choice"] = "required"
|
|
448
455
|
|
|
449
456
|
if result_type is None or result_type == "json":
|
|
457
|
+
if timeout is not None:
|
|
458
|
+
openai_kwargs["timeout"] = timeout
|
|
450
459
|
response = await self.client.responses.create(**openai_kwargs)
|
|
451
|
-
|
|
452
460
|
parts: list[Part] = []
|
|
453
461
|
for output_item in response.output:
|
|
454
462
|
if output_item.type == "message":
|
|
@@ -469,6 +477,8 @@ class OpenaiLLMClientAsync(BaseLLMClientAsync):
|
|
|
469
477
|
),
|
|
470
478
|
)
|
|
471
479
|
elif isinstance(result_type, type(BaseModel)):
|
|
480
|
+
if timeout is not None:
|
|
481
|
+
openai_kwargs["timeout"] = timeout
|
|
472
482
|
response = await self.client.responses.parse(**openai_kwargs, text_format=result_type)
|
|
473
483
|
|
|
474
484
|
parts: list[Part] = []
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import importlib
|
|
3
|
+
from functools import wraps
|
|
4
|
+
from typing import AsyncIterator, Iterator, Callable, ParamSpec, Awaitable, Any, cast
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict
|
|
7
|
+
from tenacity import RetryError
|
|
8
|
+
|
|
9
|
+
from vertexai import init as vertex_init
|
|
10
|
+
from vertexai.generative_models import GenerativeModel
|
|
11
|
+
|
|
12
|
+
from promptbuilder.llm_client.base_client import BaseLLMClient, BaseLLMClientAsync, ResultType
|
|
13
|
+
from promptbuilder.llm_client.types import (
|
|
14
|
+
Response,
|
|
15
|
+
Content,
|
|
16
|
+
Candidate,
|
|
17
|
+
UsageMetadata,
|
|
18
|
+
Part,
|
|
19
|
+
PartLike,
|
|
20
|
+
ApiKey,
|
|
21
|
+
ThinkingConfig,
|
|
22
|
+
Tool,
|
|
23
|
+
ToolConfig,
|
|
24
|
+
Model,
|
|
25
|
+
CustomApiKey,
|
|
26
|
+
)
|
|
27
|
+
from promptbuilder.llm_client.config import DecoratorConfigs
|
|
28
|
+
from promptbuilder.llm_client.utils import inherited_decorator
|
|
29
|
+
from promptbuilder.llm_client.exceptions import APIError
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
P = ParamSpec("P")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class VertexApiKey(BaseModel, CustomApiKey):
|
|
36
|
+
model_config = ConfigDict(frozen=True)
|
|
37
|
+
project: str
|
|
38
|
+
location: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@inherited_decorator
|
|
42
|
+
def _error_handler(func: Callable[P, Response]) -> Callable[P, Response]:
|
|
43
|
+
@wraps(func)
|
|
44
|
+
def wrapper(*args, **kwargs):
|
|
45
|
+
try:
|
|
46
|
+
return func(*args, **kwargs)
|
|
47
|
+
except RetryError as retry_error:
|
|
48
|
+
e = retry_error.last_attempt._exception
|
|
49
|
+
if e is None:
|
|
50
|
+
raise APIError()
|
|
51
|
+
code = getattr(e, "code", None)
|
|
52
|
+
response_json = {
|
|
53
|
+
"status": getattr(e, "status", None),
|
|
54
|
+
"message": str(e),
|
|
55
|
+
}
|
|
56
|
+
response = getattr(e, "response", None)
|
|
57
|
+
raise APIError(code, response_json, response)
|
|
58
|
+
except Exception as e: # noqa: BLE001
|
|
59
|
+
raise APIError(None, {"status": None, "message": str(e)}, None)
|
|
60
|
+
return wrapper
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _to_vertex_content(messages: list[Content]):
|
|
64
|
+
gen_mod = importlib.import_module("vertexai.generative_models")
|
|
65
|
+
VPart = getattr(gen_mod, "Part")
|
|
66
|
+
VContent = getattr(gen_mod, "Content")
|
|
67
|
+
v_messages: list[Any] = []
|
|
68
|
+
for m in messages:
|
|
69
|
+
v_parts: list[Any] = []
|
|
70
|
+
if m.parts:
|
|
71
|
+
for p in m.parts:
|
|
72
|
+
if p.text is not None:
|
|
73
|
+
v_parts.append(VPart.from_text(p.text))
|
|
74
|
+
elif p.inline_data is not None and p.inline_data.data is not None:
|
|
75
|
+
v_parts.append(VPart.from_bytes(data=p.inline_data.data, mime_type=p.inline_data.mime_type or "application/octet-stream"))
|
|
76
|
+
v_messages.append(VContent(role=m.role, parts=v_parts))
|
|
77
|
+
return v_messages
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _tool_to_vertex(tool: Tool):
|
|
81
|
+
VTool = getattr(importlib.import_module("vertexai.generative_models"), "Tool")
|
|
82
|
+
if not tool.function_declarations:
|
|
83
|
+
return VTool(function_declarations=[])
|
|
84
|
+
fds = []
|
|
85
|
+
for fd in tool.function_declarations:
|
|
86
|
+
fds.append({
|
|
87
|
+
"name": fd.name,
|
|
88
|
+
"description": fd.description,
|
|
89
|
+
"parameters": fd.parameters.model_dump() if fd.parameters is not None else None,
|
|
90
|
+
"response": fd.response.model_dump() if fd.response is not None else None,
|
|
91
|
+
})
|
|
92
|
+
return VTool(function_declarations=fds)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _tool_config_to_vertex(cfg: ToolConfig | None):
|
|
96
|
+
VToolConfig = getattr(importlib.import_module("vertexai.generative_models"), "ToolConfig")
|
|
97
|
+
if cfg is None or cfg.function_calling_config is None:
|
|
98
|
+
return None
|
|
99
|
+
mode = cfg.function_calling_config.mode or "AUTO"
|
|
100
|
+
allowed = cfg.function_calling_config.allowed_function_names
|
|
101
|
+
return VToolConfig(function_calling_config={"mode": mode, "allowedFunctionNames": allowed})
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _from_vertex_response(v_resp: Any) -> Response:
|
|
105
|
+
candidates: list[Candidate] = []
|
|
106
|
+
if getattr(v_resp, "candidates", None):
|
|
107
|
+
for c in v_resp.candidates:
|
|
108
|
+
parts: list[Part] = []
|
|
109
|
+
if c.content and getattr(c.content, "parts", None):
|
|
110
|
+
for vp in c.content.parts:
|
|
111
|
+
t = getattr(vp, "text", None)
|
|
112
|
+
if isinstance(t, str):
|
|
113
|
+
parts.append(Part(text=t))
|
|
114
|
+
candidates.append(Candidate(content=Content(parts=cast(list[Part | PartLike], parts), role="model")))
|
|
115
|
+
|
|
116
|
+
usage = None
|
|
117
|
+
um = getattr(v_resp, "usage_metadata", None)
|
|
118
|
+
if um is not None:
|
|
119
|
+
usage = UsageMetadata(
|
|
120
|
+
cached_content_token_count=getattr(um, "cached_content_token_count", None),
|
|
121
|
+
candidates_token_count=getattr(um, "candidates_token_count", None),
|
|
122
|
+
prompt_token_count=getattr(um, "prompt_token_count", None),
|
|
123
|
+
thoughts_token_count=getattr(um, "thoughts_token_count", None),
|
|
124
|
+
total_token_count=getattr(um, "total_token_count", None),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return Response(candidates=candidates, usage_metadata=usage)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class VertexLLMClient(BaseLLMClient):
|
|
131
|
+
PROVIDER: str = "vertexai"
|
|
132
|
+
|
|
133
|
+
def __init__(
|
|
134
|
+
self,
|
|
135
|
+
model: str,
|
|
136
|
+
api_key: ApiKey | None = None,
|
|
137
|
+
decorator_configs: DecoratorConfigs | None = None,
|
|
138
|
+
default_thinking_config: ThinkingConfig | None = None,
|
|
139
|
+
default_max_tokens: int | None = None,
|
|
140
|
+
project: str | None = None,
|
|
141
|
+
location: str | None = None,
|
|
142
|
+
**kwargs,
|
|
143
|
+
):
|
|
144
|
+
# Resolve project/location from args or env
|
|
145
|
+
project = project or os.getenv("VERTEXAI_PROJECT") or os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCLOUD_PROJECT")
|
|
146
|
+
location = location or os.getenv("VERTEXAI_LOCATION") or os.getenv("GOOGLE_CLOUD_REGION") or os.getenv("GOOGLE_CLOUD_LOCATION")
|
|
147
|
+
|
|
148
|
+
# Allow API Key (string) or ADC (VertexApiKey)
|
|
149
|
+
api_key_str: str | None = None
|
|
150
|
+
if isinstance(api_key, str):
|
|
151
|
+
api_key_str = api_key
|
|
152
|
+
elif api_key is None:
|
|
153
|
+
# Fallback to env vars for API key
|
|
154
|
+
api_key_str = os.getenv("VERTEX_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
|
155
|
+
elif isinstance(api_key, VertexApiKey):
|
|
156
|
+
# ADC path with explicit project/location
|
|
157
|
+
pass
|
|
158
|
+
else:
|
|
159
|
+
# Unexpected CustomApiKey subtype
|
|
160
|
+
raise ValueError("Unsupported api_key type for Vertex: expected str or VertexApiKey")
|
|
161
|
+
|
|
162
|
+
if not project or not location:
|
|
163
|
+
raise ValueError("To create a vertexai llm client you need to provide project and location via args or env vars VERTEXAI_PROJECT and VERTEXAI_LOCATION")
|
|
164
|
+
|
|
165
|
+
if not isinstance(api_key, VertexApiKey):
|
|
166
|
+
api_key = VertexApiKey(project=project, location=location)
|
|
167
|
+
|
|
168
|
+
super().__init__(
|
|
169
|
+
VertexLLMClient.PROVIDER,
|
|
170
|
+
model,
|
|
171
|
+
decorator_configs=decorator_configs,
|
|
172
|
+
default_thinking_config=default_thinking_config,
|
|
173
|
+
default_max_tokens=default_max_tokens,
|
|
174
|
+
)
|
|
175
|
+
self._api_key = api_key
|
|
176
|
+
self._api_key_str = api_key_str
|
|
177
|
+
|
|
178
|
+
vertex_init(project=self._api_key.project, location=self._api_key.location)
|
|
179
|
+
self._model = GenerativeModel(self.model)
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def api_key(self) -> VertexApiKey:
|
|
183
|
+
return self._api_key
|
|
184
|
+
|
|
185
|
+
@_error_handler
|
|
186
|
+
def _create(
|
|
187
|
+
self,
|
|
188
|
+
messages: list[Content],
|
|
189
|
+
result_type: ResultType = None,
|
|
190
|
+
*,
|
|
191
|
+
thinking_config: ThinkingConfig | None = None,
|
|
192
|
+
system_message: str | None = None,
|
|
193
|
+
max_tokens: int | None = None,
|
|
194
|
+
timeout: float | None = None,
|
|
195
|
+
tools: list[Tool] | None = None,
|
|
196
|
+
tool_config: ToolConfig = ToolConfig(),
|
|
197
|
+
) -> Response:
|
|
198
|
+
v_messages = _to_vertex_content(messages)
|
|
199
|
+
GenerationConfig = getattr(importlib.import_module("vertexai.generative_models"), "GenerationConfig")
|
|
200
|
+
gen_cfg = GenerationConfig(max_output_tokens=max_tokens or self.default_max_tokens)
|
|
201
|
+
req_opts: dict[str, Any] | None = {}
|
|
202
|
+
if timeout is not None:
|
|
203
|
+
req_opts["timeout"] = timeout
|
|
204
|
+
if self._api_key_str:
|
|
205
|
+
req_opts["api_key"] = self._api_key_str
|
|
206
|
+
if not req_opts:
|
|
207
|
+
req_opts = None
|
|
208
|
+
|
|
209
|
+
v_tools = None
|
|
210
|
+
if tools is not None:
|
|
211
|
+
v_tools = [_tool_to_vertex(t) for t in tools]
|
|
212
|
+
v_tool_cfg = _tool_config_to_vertex(tool_config)
|
|
213
|
+
|
|
214
|
+
v_resp = self._model.generate_content(
|
|
215
|
+
contents=v_messages,
|
|
216
|
+
generation_config=gen_cfg,
|
|
217
|
+
tools=v_tools,
|
|
218
|
+
tool_config=v_tool_cfg,
|
|
219
|
+
system_instruction=system_message,
|
|
220
|
+
request_options=req_opts,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
resp = _from_vertex_response(v_resp)
|
|
224
|
+
if result_type == "json" and resp.text is not None:
|
|
225
|
+
resp.parsed = BaseLLMClient.as_json(resp.text)
|
|
226
|
+
elif isinstance(result_type, type(BaseModel)) and resp.text is not None:
|
|
227
|
+
parsed = BaseLLMClient.as_json(resp.text)
|
|
228
|
+
resp.parsed = result_type.model_validate(parsed)
|
|
229
|
+
return resp
|
|
230
|
+
|
|
231
|
+
def create_stream(
|
|
232
|
+
self,
|
|
233
|
+
messages: list[Content],
|
|
234
|
+
*,
|
|
235
|
+
thinking_config: ThinkingConfig | None = None,
|
|
236
|
+
system_message: str | None = None,
|
|
237
|
+
max_tokens: int | None = None,
|
|
238
|
+
) -> Iterator[Response]:
|
|
239
|
+
v_messages = _to_vertex_content(messages)
|
|
240
|
+
GenerationConfig = getattr(importlib.import_module("vertexai.generative_models"), "GenerationConfig")
|
|
241
|
+
gen_cfg = GenerationConfig(max_output_tokens=max_tokens or self.default_max_tokens)
|
|
242
|
+
|
|
243
|
+
# Handle thinking config
|
|
244
|
+
if thinking_config is None:
|
|
245
|
+
thinking_config = self.default_thinking_config
|
|
246
|
+
if thinking_config is not None:
|
|
247
|
+
# Store for potential future use when Vertex AI supports thinking features
|
|
248
|
+
pass
|
|
249
|
+
|
|
250
|
+
req_opts: dict[str, Any] | None = {}
|
|
251
|
+
if self._api_key_str:
|
|
252
|
+
req_opts["api_key"] = self._api_key_str
|
|
253
|
+
if not req_opts:
|
|
254
|
+
req_opts = None
|
|
255
|
+
stream = self._model.generate_content(
|
|
256
|
+
contents=v_messages,
|
|
257
|
+
generation_config=gen_cfg,
|
|
258
|
+
system_instruction=system_message,
|
|
259
|
+
request_options=req_opts,
|
|
260
|
+
stream=True,
|
|
261
|
+
)
|
|
262
|
+
for ev in stream:
|
|
263
|
+
yield _from_vertex_response(ev)
|
|
264
|
+
|
|
265
|
+
@staticmethod
|
|
266
|
+
def models_list() -> list[Model]:
|
|
267
|
+
return []
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
@inherited_decorator
|
|
271
|
+
def _error_handler_async(func: Callable[P, Awaitable[Response]]) -> Callable[P, Awaitable[Response]]:
|
|
272
|
+
@wraps(func)
|
|
273
|
+
async def wrapper(*args, **kwargs):
|
|
274
|
+
try:
|
|
275
|
+
return await func(*args, **kwargs)
|
|
276
|
+
except RetryError as retry_error:
|
|
277
|
+
e = retry_error.last_attempt._exception
|
|
278
|
+
if e is None:
|
|
279
|
+
raise APIError()
|
|
280
|
+
code = getattr(e, "code", None)
|
|
281
|
+
response_json = {
|
|
282
|
+
"status": getattr(e, "status", None),
|
|
283
|
+
"message": str(e),
|
|
284
|
+
}
|
|
285
|
+
response = getattr(e, "response", None)
|
|
286
|
+
raise APIError(code, response_json, response)
|
|
287
|
+
except Exception as e: # noqa: BLE001
|
|
288
|
+
raise APIError(None, {"status": None, "message": str(e)}, None)
|
|
289
|
+
return wrapper
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class VertexLLMClientAsync(BaseLLMClientAsync):
|
|
293
|
+
PROVIDER: str = "vertexai"
|
|
294
|
+
|
|
295
|
+
def __init__(
|
|
296
|
+
self,
|
|
297
|
+
model: str,
|
|
298
|
+
api_key: ApiKey | None = None,
|
|
299
|
+
decorator_configs: DecoratorConfigs | None = None,
|
|
300
|
+
default_thinking_config: ThinkingConfig | None = None,
|
|
301
|
+
default_max_tokens: int | None = None,
|
|
302
|
+
project: str | None = None,
|
|
303
|
+
location: str | None = None,
|
|
304
|
+
**kwargs,
|
|
305
|
+
):
|
|
306
|
+
project = project or os.getenv("VERTEXAI_PROJECT") or os.getenv("GOOGLE_CLOUD_PROJECT") or os.getenv("GCLOUD_PROJECT")
|
|
307
|
+
location = location or os.getenv("VERTEXAI_LOCATION") or os.getenv("GOOGLE_CLOUD_REGION") or os.getenv("GOOGLE_CLOUD_LOCATION")
|
|
308
|
+
|
|
309
|
+
api_key_str: str | None = None
|
|
310
|
+
if isinstance(api_key, str):
|
|
311
|
+
api_key_str = api_key
|
|
312
|
+
elif api_key is None:
|
|
313
|
+
api_key_str = os.getenv("VERTEX_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
|
314
|
+
elif isinstance(api_key, VertexApiKey):
|
|
315
|
+
pass
|
|
316
|
+
else:
|
|
317
|
+
raise ValueError("Unsupported api_key type for Vertex: expected str or VertexApiKey")
|
|
318
|
+
|
|
319
|
+
if not project or not location:
|
|
320
|
+
raise ValueError("To create a vertexai llm client you need to provide project and location via args or env vars VERTEXAI_PROJECT and VERTEXAI_LOCATION")
|
|
321
|
+
|
|
322
|
+
if not isinstance(api_key, VertexApiKey):
|
|
323
|
+
api_key = VertexApiKey(project=project, location=location)
|
|
324
|
+
|
|
325
|
+
super().__init__(
|
|
326
|
+
VertexLLMClientAsync.PROVIDER,
|
|
327
|
+
model,
|
|
328
|
+
decorator_configs=decorator_configs,
|
|
329
|
+
default_thinking_config=default_thinking_config,
|
|
330
|
+
default_max_tokens=default_max_tokens,
|
|
331
|
+
)
|
|
332
|
+
self._api_key = api_key
|
|
333
|
+
self._api_key_str = api_key_str
|
|
334
|
+
|
|
335
|
+
vertex_init(project=self._api_key.project, location=self._api_key.location)
|
|
336
|
+
self._model = GenerativeModel(self.model)
|
|
337
|
+
|
|
338
|
+
@property
|
|
339
|
+
def api_key(self) -> VertexApiKey:
|
|
340
|
+
return self._api_key
|
|
341
|
+
|
|
342
|
+
@_error_handler_async
|
|
343
|
+
async def _create(
|
|
344
|
+
self,
|
|
345
|
+
messages: list[Content],
|
|
346
|
+
result_type: ResultType = None,
|
|
347
|
+
*,
|
|
348
|
+
thinking_config: ThinkingConfig | None = None,
|
|
349
|
+
system_message: str | None = None,
|
|
350
|
+
max_tokens: int | None = None,
|
|
351
|
+
timeout: float | None = None,
|
|
352
|
+
tools: list[Tool] | None = None,
|
|
353
|
+
tool_config: ToolConfig = ToolConfig(),
|
|
354
|
+
) -> Response:
|
|
355
|
+
# Reuse sync implementation (SDK is sync). For real async, offload to thread.
|
|
356
|
+
client = VertexLLMClient(
|
|
357
|
+
model=self.model,
|
|
358
|
+
api_key=self._api_key,
|
|
359
|
+
decorator_configs=self._decorator_configs,
|
|
360
|
+
default_thinking_config=self.default_thinking_config,
|
|
361
|
+
default_max_tokens=self.default_max_tokens,
|
|
362
|
+
)
|
|
363
|
+
return client._create(
|
|
364
|
+
messages=messages,
|
|
365
|
+
result_type=result_type,
|
|
366
|
+
thinking_config=thinking_config,
|
|
367
|
+
system_message=system_message,
|
|
368
|
+
max_tokens=max_tokens,
|
|
369
|
+
timeout=timeout,
|
|
370
|
+
tools=tools,
|
|
371
|
+
tool_config=tool_config,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
async def create_stream(
|
|
375
|
+
self,
|
|
376
|
+
messages: list[Content],
|
|
377
|
+
*,
|
|
378
|
+
thinking_config: ThinkingConfig | None = None,
|
|
379
|
+
system_message: str | None = None,
|
|
380
|
+
max_tokens: int | None = None,
|
|
381
|
+
) -> AsyncIterator[Response]:
|
|
382
|
+
# Provide a simple wrapper yielding once (non-streaming)
|
|
383
|
+
resp = await self._create(
|
|
384
|
+
messages=messages,
|
|
385
|
+
result_type=None,
|
|
386
|
+
thinking_config=thinking_config,
|
|
387
|
+
system_message=system_message,
|
|
388
|
+
max_tokens=max_tokens,
|
|
389
|
+
)
|
|
390
|
+
yield resp
|
|
391
|
+
|
|
392
|
+
@staticmethod
|
|
393
|
+
def models_list() -> list[Model]:
|
|
394
|
+
return VertexLLMClient.models_list()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: promptbuilder
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.31
|
|
4
4
|
Summary: Library for building prompts for LLMs
|
|
5
5
|
Home-page: https://github.com/kapulkin/promptbuilder
|
|
6
6
|
Author: Kapulkin Stanislav
|
|
@@ -18,6 +18,9 @@ Requires-Dist: google-genai>=1.4.0
|
|
|
18
18
|
Requires-Dist: anthropic
|
|
19
19
|
Requires-Dist: openai
|
|
20
20
|
Requires-Dist: aioboto3
|
|
21
|
+
Requires-Dist: litellm
|
|
22
|
+
Requires-Dist: httpx
|
|
23
|
+
Requires-Dist: aiohttp
|
|
21
24
|
Dynamic: author
|
|
22
25
|
Dynamic: author-email
|
|
23
26
|
Dynamic: classifier
|
|
@@ -7,21 +7,22 @@ promptbuilder/agent/context.py,sha256=CVw715vFrhfvddQmRNy4A1U87GsZyIKj9Xu4SCidbc
|
|
|
7
7
|
promptbuilder/agent/tool.py,sha256=VDbIHK3_Q62Ei7hwLF7nIgHq-PTMKnv1NSjHpDYkUZE,2651
|
|
8
8
|
promptbuilder/agent/utils.py,sha256=vTkphKw04v_QDIJtoB2JKK0RGY6iI1t_0LbmuStunzI,356
|
|
9
9
|
promptbuilder/llm_client/__init__.py,sha256=wJ33cnRtZX_YPsbcGxEu3SEZMOhPX7-fHI59MEPUe7I,517
|
|
10
|
-
promptbuilder/llm_client/aisuite_client.py,sha256=
|
|
11
|
-
promptbuilder/llm_client/anthropic_client.py,sha256=
|
|
12
|
-
promptbuilder/llm_client/base_client.py,sha256=
|
|
13
|
-
promptbuilder/llm_client/bedrock_client.py,sha256=
|
|
10
|
+
promptbuilder/llm_client/aisuite_client.py,sha256=8inY3UoH8o9yEOvRYP6a_8pjGQK0W_f9eV8MmHzpKTU,15641
|
|
11
|
+
promptbuilder/llm_client/anthropic_client.py,sha256=GL5FRmqu2iQqU44joaviEaRpEp4h_USpUiYc8sWu52Y,28326
|
|
12
|
+
promptbuilder/llm_client/base_client.py,sha256=x9s_pyOiOWlSjTnRo162GWcI4pILoCCwomFoLGrn0RU,29922
|
|
13
|
+
promptbuilder/llm_client/bedrock_client.py,sha256=PGb7KxaK0QwhsZ9frz07h7I2zeyjMMWqIYC7DS6AZp0,28181
|
|
14
14
|
promptbuilder/llm_client/config.py,sha256=exQEm35wp7lK5SfXNpN5H9VZEb2LVa4pyZ-cxGt1U-U,1124
|
|
15
15
|
promptbuilder/llm_client/exceptions.py,sha256=t-X7r_a8B1jNu8eEavde1jXu5dz97yV3IG4YHOtgh0Y,4836
|
|
16
|
-
promptbuilder/llm_client/google_client.py,sha256=
|
|
17
|
-
promptbuilder/llm_client/litellm_client.py,sha256=
|
|
16
|
+
promptbuilder/llm_client/google_client.py,sha256=ZjJjDUQZH6zAIRoi4xUx3IDEm8jRkVWGyehy5P_Ba_M,12170
|
|
17
|
+
promptbuilder/llm_client/litellm_client.py,sha256=XoYZmeU8XuROhvzVqbdjaWPktOSVKjehIAZgC1C6Lgo,25585
|
|
18
18
|
promptbuilder/llm_client/logfire_decorators.py,sha256=un_QnIekypOEcqTZ5v1y9pwijGnF95xwnwKO5rFSHVY,9667
|
|
19
|
-
promptbuilder/llm_client/main.py,sha256=
|
|
20
|
-
promptbuilder/llm_client/openai_client.py,sha256=
|
|
19
|
+
promptbuilder/llm_client/main.py,sha256=2Q7J5FwivX2YwvptzoSEtCfvfcI9p5HC55D3mMb2se4,8243
|
|
20
|
+
promptbuilder/llm_client/openai_client.py,sha256=QMXX7VPYWFo1VvX8bWF6jpi95ZIOk_MMBpz-14GrT-k,25274
|
|
21
21
|
promptbuilder/llm_client/types.py,sha256=kgbg5FRzvZwu98y1OhAZJDneXBNPnsFZueQCr9HXIY4,8063
|
|
22
22
|
promptbuilder/llm_client/utils.py,sha256=79lvSppjrrItHB5MIozbp_5Oq7TsOK4Qzt9Ae3XMLFw,7624
|
|
23
|
-
promptbuilder
|
|
24
|
-
promptbuilder-0.4.
|
|
25
|
-
promptbuilder-0.4.
|
|
26
|
-
promptbuilder-0.4.
|
|
27
|
-
promptbuilder-0.4.
|
|
23
|
+
promptbuilder/llm_client/vertex_client.py,sha256=aewidTryIpFMlTRFmDqOG7O-NCbvTP5wW6I3-3vQShE,15002
|
|
24
|
+
promptbuilder-0.4.31.dist-info/licenses/LICENSE,sha256=fqXmInzgsvEOIaKSBgcrwKyYCGYF0MKErJ0YivtODcc,1096
|
|
25
|
+
promptbuilder-0.4.31.dist-info/METADATA,sha256=bcAusvEhtctjGM_TGcZb6cpjbmD3BdSj6ajd6gawga0,3799
|
|
26
|
+
promptbuilder-0.4.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
promptbuilder-0.4.31.dist-info/top_level.txt,sha256=UBVcYn4UgrPy3O3fmmnPEU_kieuplBMgheetIMei4EI,14
|
|
28
|
+
promptbuilder-0.4.31.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|