PyPI - promptbuilder - Versions diffs - 0.4.35__tar.gz → 0.4.37__tar.gz - Mend

promptbuilder 0.4.35tar.gz → 0.4.37tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{promptbuilder-0.4.35/promptbuilder.egg-info → promptbuilder-0.4.37}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: promptbuilder
-Version: 0.4.35
+Version: 0.4.37
 Summary: Library for building prompts for LLMs
 Home-page: https://github.com/kapulkin/promptbuilder
 Author: Kapulkin Stanislav

{promptbuilder-0.4.35 → promptbuilder-0.4.37}/promptbuilder/llm_client/base_client.py RENAMED Viewed

@@ -319,34 +319,47 @@ class BaseLLMClient(ABC, utils.InheritDecoratorsMixin):
             max_tokens = self.default_max_tokens
         stream_messages = []
         total_count = 0
-        for response in self._create_stream(
-            messages=messages,
-            thinking_config=thinking_config,
-            system_message=system_message,
-            max_tokens=max_tokens if not autocomplete else None,
-        ):
-            yield response
+        response: Response | None = None
+        # Factory to (re)create the underlying provider stream using current accumulated state
+        def _stream_factory():
+            nonlocal response, total_count
+            tries = 3
+            while tries > 0:
+                try:
+                    iter = self._create_stream(
+                        messages=messages + stream_messages,
+                        thinking_config=thinking_config,
+                        system_message=system_message,
+                        max_tokens=max_tokens if not autocomplete else None,
+                    )
+                    for response in iter:
+                        yield response
+                    break
+                except Exception as e:
+                    tries -= 1
+                    if tries == 0:
+                        raise
+                    logger.warning(f"Stream generation error: {e}, retrying...")
+        # Use retry to iterate through the stream; on exception previously yielded parts
+        # are already merged into stream_messages so resumed attempts continue generation.
+        for response in _stream_factory():
             BaseLLMClient._append_generated_part(stream_messages, response)
-            finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
             total_count += BaseLLMClient._response_out_tokens(response)
-            if finish_reason:
-                if autocomplete:
-                    while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
-                        for response in self._create_stream(
-                            messages=messages,
-                            thinking_config=thinking_config,
-                            system_message=system_message,
-                            max_tokens=max_tokens if not autocomplete else None,
-                        ):
-                            yield response
-                            BaseLLMClient._append_generated_part(stream_messages, response)
-                            finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-                            total_count += BaseLLMClient._response_out_tokens(response)
-                        if max_tokens is not None and total_count >= max_tokens:
-                            break
+            yield response
+        finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
+        if finish_reason and autocomplete:
+            while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
+                for response in _stream_factory():
+                    BaseLLMClient._append_generated_part(stream_messages, response)
+                    total_count += BaseLLMClient._response_out_tokens(response)
+                    yield response
+                finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
+                if max_tokens is not None and total_count >= max_tokens:
+                    break
     @overload
     def from_text(
         self,
@@ -673,33 +686,44 @@ class BaseLLMClientAsync(ABC, utils.InheritDecoratorsMixin):
             max_tokens = self.default_max_tokens
         total_count = 0
-        stream_iter = await self._create_stream(
-            messages=messages,
-            thinking_config=thinking_config,
-            system_message=system_message,
-            max_tokens=max_tokens if not autocomplete else None,
-        )
+        response = None
+        async def _stream_factory():
+            nonlocal response, total_count
+            tries = 3
+            while tries > 0:
+                try:
+                    iter = await self._create_stream(
+                        messages=messages,
+                        thinking_config=thinking_config,
+                        system_message=system_message,
+                        max_tokens=max_tokens if not autocomplete else None,
+                    )
+                    async for response in iter:
+                        BaseLLMClient._append_generated_part(messages, response)
+                        total_count += BaseLLMClient._response_out_tokens(response)
+                        yield response
+                    break
+                except Exception as e:
+                    tries -= 1
+                    if tries <= 0:
+                        raise
+                    logger.warning(f"Stream generation error: {e}, retrying...")
+        stream_iter = _stream_factory()
         async for response in stream_iter:
             yield response
-            BaseLLMClient._append_generated_part(messages, response)
-            finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-            total_count += BaseLLMClient._response_out_tokens(response)
-            if finish_reason:
-                if autocomplete:
-                    while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
-                        stream_iter = await self._create_stream(
-                            messages=messages,
-                            thinking_config=thinking_config,
-                            system_message=system_message,
-                            max_tokens=max_tokens if not autocomplete else None,
-                        )
-                        async for response in stream_iter:
-                            yield response
-                            BaseLLMClient._append_generated_part(messages, response)
-                            finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
-                            total_count += BaseLLMClient._response_out_tokens(response)
-                        if max_tokens is not None and total_count >= max_tokens:
-                            break
+        finish_reason = response.candidates[0].finish_reason.value if response and response.candidates and response.candidates[0].finish_reason else None
+        if finish_reason and autocomplete:
+            while response.candidates and finish_reason == FinishReason.MAX_TOKENS.value:
+                stream_iter = _stream_factory()
+                async for response in stream_iter:
+                    yield response
+                finish_reason = response.candidates[0].finish_reason.value if response.candidates and response.candidates[0].finish_reason else None
+                if max_tokens is not None and total_count >= max_tokens:
+                    break
     @overload
     async def from_text(

{promptbuilder-0.4.35 → promptbuilder-0.4.37}/promptbuilder/llm_client/logfire_decorators.py RENAMED Viewed

@@ -54,6 +54,21 @@ def extract_response_data(response: Response) -> dict[str, Any]:
     return response_data
+def record(span: logfire.LogfireSpan, duration: float, response: Response):
+    span.set_attribute("duration", duration)
+    span.set_attribute("response_data", extract_response_data(response))
+    span.set_attribute("candidates", response.candidates)
+    span.set_attribute("parsed", response.parsed)
+    span.set_attribute("response_text", response.text)
+    if response.usage_metadata is not None:
+        span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
+        span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
+        span.set_attribute("usage_metadata.thoughts_token_count", response.usage_metadata.thoughts_token_count)
+        span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
+        span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
 @inherited_decorator
 def create(class_method: Callable[P, Response]) -> Callable[P, Response]:
     """
@@ -69,17 +84,7 @@ def create(class_method: Callable[P, Response]) -> Callable[P, Response]:
         with logfire_llm.span(f"Create with {span_data["full_model_name"]}", **span_data) as span:
             start_time = time.time()
             response = class_method(self, *args, **kwargs)
-            span.set_attribute("duration", time.time() - start_time)
-            span.set_attribute("response_data", extract_response_data(response))
-            span.set_attribute("candidates", response.candidates)
-            span.set_attribute("parsed", response.parsed)
-            span.set_attribute("response_text", response.text)
-            if response.usage_metadata is not None:
-                span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
-                span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
-                span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
-                span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
+            record(span, time.time() - start_time, response)
             return response
@@ -101,17 +106,7 @@ def create_async(class_method: Callable[P, Awaitable[Response]]) -> Callable[P,
         with logfire_llm.span(f"Async create with {span_data["full_model_name"]}", **span_data) as span:
             start_time = time.time()
             response = await class_method(self, *args, **kwargs)
-            span.set_attribute("duration", time.time() - start_time)
-            span.set_attribute("response_data", extract_response_data(response))
-            span.set_attribute("candidates", response.candidates)
-            span.set_attribute("parsed", response.parsed)
-            span.set_attribute("response_text", response.text)
-            if response.usage_metadata is not None:
-                span.set_attribute("usage_metadata.cached_content_token_count", response.usage_metadata.cached_content_token_count)
-                span.set_attribute("usage_metadata.candidates_token_count", response.usage_metadata.candidates_token_count)
-                span.set_attribute("usage_metadata.prompt_token_count", response.usage_metadata.prompt_token_count)
-                span.set_attribute("usage_metadata.total_token_count", response.usage_metadata.total_token_count)
+            record(span, time.time() - start_time, response)
             return response
@@ -150,6 +145,7 @@ def record_streaming(span: logfire.LogfireSpan):
         span.set_attribute("response_text", stream_state.get_response_data()["message"]["content"])
         span.set_attribute("usage_metadata.cached_content_token_count", stream_state.last_usage_data.cached_content_token_count)
         span.set_attribute("usage_metadata.candidates_token_count", stream_state.last_usage_data.candidates_token_count)
+        span.set_attribute("usage_metadata.thoughts_token_count", stream_state.last_usage_data.thoughts_token_count)
         span.set_attribute("usage_metadata.prompt_token_count", stream_state.last_usage_data.prompt_token_count)
         span.set_attribute("usage_metadata.total_token_count", stream_state.last_usage_data.total_token_count)

{promptbuilder-0.4.35 → promptbuilder-0.4.37/promptbuilder.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: promptbuilder
-Version: 0.4.35
+Version: 0.4.37
 Summary: Library for building prompts for LLMs
 Home-page: https://github.com/kapulkin/promptbuilder
 Author: Kapulkin Stanislav

{promptbuilder-0.4.35 → promptbuilder-0.4.37}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="promptbuilder",
-    version="0.4.35",
+    version="0.4.37",
     packages=find_packages(),
     install_requires=[
         "pydantic",