promptbuilder 0.4.35__py3-none-any.whl → 0.4.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -319,34 +319,47 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
319
319
  max_tokens = self.default_max_tokens
320
320
 
321
321
  stream_messages = []
322
-
323
322
  total_count = 0
324
- for response in self._create_stream(
325
- messages=messages,
326
- thinking_config=thinking_config,
327
- system_message=system_message,
328
- max_tokens=max_tokens if not autocomplete else None,
329
- ):
330
- yield response
323
+ response: Response | None = None
324
+
325
+ # Factory to (re)create the underlying provider stream using current accumulated state
326
+ def _stream_factory():
327
+ nonlocal response, total_count
328
+ tries = 3
329
+ while tries > 0:
330
+ try:
331
+ iter = self._create_stream(
332
+ messages=messages + stream_messages,
333
+ thinking_config=thinking_config,
334
+ system_message=system_message,
335
+ max_tokens=max_tokens if not autocomplete else None,
336
+ )
337
+ for response in iter:
338
+ yield response
339
+ break
340
+ except Exception as e:
341
+ tries -= 1
342
+ if tries == 0:
343
+ raise
344
+ logger.warning(f"Stream generation error: {e}, retrying...")
345
+
346
+ # Use retry to iterate through the stream; on exception previously yielded parts
347
+ # are already merged into stream_messages so resumed attempts continue generation.
348
+ for response in _stream_factory():
331
349
  BaseLLMClient._append_generated_part(stream_messages, response)
332
- finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
333
350
  total_count += BaseLLMClient._response_out_tokens(response)
334
- if finish_reason:
335
- if autocomplete:
336
- while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
337
- for response in self._create_stream(
338
- messages=messages,
339
- thinking_config=thinking_config,
340
- system_message=system_message,
341
- max_tokens=max_tokens if not autocomplete else None,
342
- ):
343
- yield response
344
- BaseLLMClient._append_generated_part(stream_messages, response)
345
- finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
346
- total_count += BaseLLMClient._response_out_tokens(response)
347
- if max_tokens is not None and total_count >= max_tokens:
348
- break
349
-
351
+ yield response
352
+ finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
353
+ if finish_reason and autocomplete:
354
+ while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
355
+ for response in _stream_factory():
356
+ BaseLLMClient._append_generated_part(stream_messages, response)
357
+ total_count += BaseLLMClient._response_out_tokens(response)
358
+ yield response
359
+ finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
360
+ if max_tokens is not None and total_count >= max_tokens:
361
+ break
362
+
350
363
  @overload
351
364
  def from_text(
352
365
  self,
@@ -673,33 +686,44 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
673
686
  max_tokens = self.default_max_tokens
674
687
 
675
688
  total_count = 0
676
- stream_iter = await self._create_stream(
677
- messages=messages,
678
- thinking_config=thinking_config,
679
- system_message=system_message,
680
- max_tokens=max_tokens if not autocomplete else None,
681
- )
689
+ response = None
690
+
691
+ async def _stream_factory():
692
+ nonlocal response, total_count
693
+ tries = 3
694
+ while tries > 0:
695
+ try:
696
+ iter = await self._create_stream(
697
+ messages=messages,
698
+ thinking_config=thinking_config,
699
+ system_message=system_message,
700
+ max_tokens=max_tokens if not autocomplete else None,
701
+ )
702
+
703
+ async for response in iter:
704
+ BaseLLMClient._append_generated_part(messages, response)
705
+ total_count += BaseLLMClient._response_out_tokens(response)
706
+ yield response
707
+ break
708
+ except Exception as e:
709
+ tries -= 1
710
+ if tries <= 0:
711
+ raise
712
+ logger.warning(f"Stream generation error: {e}, retrying...")
713
+
714
+ stream_iter = _stream_factory()
682
715
  async for response in stream_iter:
683
716
  yield response
684
- BaseLLMClient._append_generated_part(messages, response)
685
- finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
686
- total_count += BaseLLMClient._response_out_tokens(response)
687
- if finish_reason:
688
- if autocomplete:
689
- while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
690
- stream_iter = await self._create_stream(
691
- messages=messages,
692
- thinking_config=thinking_config,
693
- system_message=system_message,
694
- max_tokens=max_tokens if not autocomplete else None,
695
- )
696
- async for response in stream_iter:
697
- yield response
698
- BaseLLMClient._append_generated_part(messages, response)
699
- finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
700
- total_count += BaseLLMClient._response_out_tokens(response)
701
- if max_tokens is not None and total_count >= max_tokens:
702
- break
717
+
718
+ finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
719
+ if finish_reason and autocomplete:
720
+ while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
721
+ stream_iter = _stream_factory()
722
+ async for response in stream_iter:
723
+ yield response
724
+ finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
725
+ if max_tokens is not None and total_count >= max_tokens:
726
+ break
703
727
 
704
728
  @overload
705
729
  async def from_text(
@@ -54,6 +54,21 @@ def extract_response_data(response: Response) -> dict[str, Any]:
54
54
  return response_data
55
55
 
56
56
 
57
+ def record(span: logfire.LogfireSpan, duration: float, response: Response):
58
+ span.set_attribute("duration", duration)
59
+
60
+ span.set_attribute("response_data", extract_response_data(response))
61
+ span.set_attribute("candidates", response.candidates)
62
+ span.set_attribute("parsed", response.parsed)
63
+ span.set_attribute("response_text", response.text)
64
+ if response.usage_metadata is not None:
65
+ span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
66
+ span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
67
+ span.set_attribute("usage_metadata.thoughts_token_count", response.usage_metadata.thoughts_token_count)
68
+ span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
69
+ span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
70
+
71
+
57
72
  @inherited_decorator
58
73
  def create(class_method: Callable[P, Response]) -> Callable[P, Response]:
59
74
  """
@@ -69,17 +84,7 @@ def create(class_method: Callable[P, Response]) -> Callable[P, Response]:
69
84
  with logfire_llm.span(f"Create with {span_data["full_model_name"]}", **span_data) as span:
70
85
  start_time = time.time()
71
86
  response = class_method(self, *args, **kwargs)
72
- span.set_attribute("duration", time.time() - start_time)
73
-
74
- span.set_attribute("response_data", extract_response_data(response))
75
- span.set_attribute("candidates", response.candidates)
76
- span.set_attribute("parsed", response.parsed)
77
- span.set_attribute("response_text", response.text)
78
- if response.usage_metadata is not None:
79
- span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
80
- span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
81
- span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
82
- span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
87
+ record(span, time.time() - start_time, response)
83
88
 
84
89
  return response
85
90
 
@@ -101,17 +106,7 @@ def create_async(class_method: Callable[P, Awaitable[Response]]) -> Callable[P,
101
106
  with logfire_llm.span(f"Async create with {span_data["full_model_name"]}", **span_data) as span:
102
107
  start_time = time.time()
103
108
  response = await class_method(self, *args, **kwargs)
104
- span.set_attribute("duration", time.time() - start_time)
105
-
106
- span.set_attribute("response_data", extract_response_data(response))
107
- span.set_attribute("candidates", response.candidates)
108
- span.set_attribute("parsed", response.parsed)
109
- span.set_attribute("response_text", response.text)
110
- if response.usage_metadata is not None:
111
- span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
112
- span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
113
- span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
114
- span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
109
+ record(span, time.time() - start_time, response)
115
110
 
116
111
  return response
117
112
 
@@ -150,6 +145,7 @@ def record_streaming(span: logfire.LogfireSpan):
150
145
  span.set_attribute("response_text", stream_state.get_response_data()["message"]["content"])
151
146
  span.set_attribute("usage_metadata.cached_content_token_count", stream_state.last_usage_data.cached_content_token_count)
152
147
  span.set_attribute("usage_metadata.candidates_token_count", stream_state.last_usage_data.candidates_token_count)
148
+ span.set_attribute("usage_metadata.thoughts_token_count", stream_state.last_usage_data.thoughts_token_count)
153
149
  span.set_attribute("usage_metadata.prompt_token_count", stream_state.last_usage_data.prompt_token_count)
154
150
  span.set_attribute("usage_metadata.total_token_count", stream_state.last_usage_data.total_token_count)
155
151
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: promptbuilder
3
- Version: 0.4.35
3
+ Version: 0.4.37
4
4
  Summary: Library for building prompts for LLMs
5
5
  Home-page: https://github.com/kapulkin/promptbuilder
6
6
  Author: Kapulkin Stanislav
@@ -9,20 +9,20 @@ promptbuilder/agent/utils.py,sha256=vTkphKw04v_QDIJtoB2JKK0RGY6iI1t_0LbmuStunzI,
9
9
  promptbuilder/llm_client/__init__.py,sha256=wJ33cnRtZX_YPsbcGxEu3SEZMOhPX7-fHI59MEPUe7I,517
10
10
  promptbuilder/llm_client/aisuite_client.py,sha256=8inY3UoH8o9yEOvRYP6a_8pjGQK0W_f9eV8MmHzpKTU,15641
11
11
  promptbuilder/llm_client/anthropic_client.py,sha256=YR1Pc4Fj0WpG7qcQnPLkQMzDsvA7SMvWomFR1oCzMsk,28328
12
- promptbuilder/llm_client/base_client.py,sha256=tpT_XlSZOW0PAB1OkIJQsDc5OUz0MdjB6GAfLjNW3Jk,34147
12
+ promptbuilder/llm_client/base_client.py,sha256=RZ7AF5b0Z8Y8WACcyhSRBe4OJFjkhsDaCkiiUYkvYIM,34852
13
13
  promptbuilder/llm_client/bedrock_client.py,sha256=IQt7Sv_Wt6mg5-bhuyr-Nwjx5Nxk2S8rKEVkfwvWqE0,28183
14
14
  promptbuilder/llm_client/config.py,sha256=exQEm35wp7lK5SfXNpN5H9VZEb2LVa4pyZ-cxGt1U-U,1124
15
15
  promptbuilder/llm_client/exceptions.py,sha256=t-X7r_a8B1jNu8eEavde1jXu5dz97yV3IG4YHOtgh0Y,4836
16
16
  promptbuilder/llm_client/google_client.py,sha256=xXMWRf0TRuQCPmYfo2fnPRuoUnSiPNUOGU-GfHnd6uo,12234
17
17
  promptbuilder/llm_client/litellm_client.py,sha256=faQZ5k0tbQa0Z6t43OE05Ky1eRXquy9cVgMGpZCc2Uk,36416
18
- promptbuilder/llm_client/logfire_decorators.py,sha256=pEPm3nTfqkg9kZr3s5ORs3fMex_z4Ce8CAt301m3OIk,9697
18
+ promptbuilder/llm_client/logfire_decorators.py,sha256=IoZf3PY1TFZlazY-GuAICF2GAJ8qtMpkTvBU3RPw9TQ,9126
19
19
  promptbuilder/llm_client/main.py,sha256=2Q7J5FwivX2YwvptzoSEtCfvfcI9p5HC55D3mMb2se4,8243
20
20
  promptbuilder/llm_client/openai_client.py,sha256=ZgI22-j4_B6OmvV10DO1lk4yN63hNYAaHx_K6E3pTSA,25276
21
21
  promptbuilder/llm_client/types.py,sha256=fnkSMFjK9ViaRQsD6LILpLz8R2_E1TI9efjy8VNO0RQ,8139
22
22
  promptbuilder/llm_client/utils.py,sha256=79lvSppjrrItHB5MIozbp_5Oq7TsOK4Qzt9Ae3XMLFw,7624
23
23
  promptbuilder/llm_client/vertex_client.py,sha256=OgbmRIYQXeK2kAh27ijhXuTzeg8APP38IyJ9WJqvDkY,15405
24
- promptbuilder-0.4.35.dist-info/licenses/LICENSE,sha256=fqXmInzgsvEOIaKSBgcrwKyYCGYF0MKErJ0YivtODcc,1096
25
- promptbuilder-0.4.35.dist-info/METADATA,sha256=I7gvoYDCgoHnFUB180N5LjBZlWPFg5u-rEn48oCfJv8,3799
26
- promptbuilder-0.4.35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- promptbuilder-0.4.35.dist-info/top_level.txt,sha256=UBVcYn4UgrPy3O3fmmnPEU_kieuplBMgheetIMei4EI,14
28
- promptbuilder-0.4.35.dist-info/RECORD,,
24
+ promptbuilder-0.4.37.dist-info/licenses/LICENSE,sha256=fqXmInzgsvEOIaKSBgcrwKyYCGYF0MKErJ0YivtODcc,1096
25
+ promptbuilder-0.4.37.dist-info/METADATA,sha256=Qjz3KpGxhDKBbjJQpkNXLQCXNMEmAn4orrnraB6qKt4,3799
26
+ promptbuilder-0.4.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ promptbuilder-0.4.37.dist-info/top_level.txt,sha256=UBVcYn4UgrPy3O3fmmnPEU_kieuplBMgheetIMei4EI,14
28
+ promptbuilder-0.4.37.dist-info/RECORD,,