PyPI - payi - Versions diffs - 0.1.0a82__py3-none-any.whl → 0.1.0a83__py3-none-any.whl - Mend

payi 0.1.0a82py3-none-any.whl → 0.1.0a83py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of payi might be problematic. Click here for more details.

Files changed (11) hide show

payi/_version.py +1 -1
payi/lib/AnthropicInstrumentor.py +10 -3
payi/lib/BedrockInstrumentor.py +16 -11
payi/lib/GoogleGenAiInstrumentor.py +5 -3
payi/lib/OpenAIInstrumentor.py +13 -9
payi/lib/VertexInstrumentor.py +40 -16
payi/lib/instrument.py +119 -64
{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/METADATA +1 -1
{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/RECORD +11 -11
{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/WHEEL +0 -0
{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/licenses/LICENSE +0 -0

payi/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 __title__ = "payi"
-__version__ = "0.1.0-alpha.82"  # x-release-please-version
+__version__ = "0.1.0-alpha.83"  # x-release-please-version

payi/lib/AnthropicInstrumentor.py CHANGED Viewed

@@ -7,7 +7,7 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class AnthropicInstrumentor:
@@ -133,7 +133,8 @@ class _AnthropicProviderRequest(_ProviderRequest):
             )
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         if chunk.type == "message_start":
             self._ingest["provider_response_id"] = chunk.message.id
@@ -154,9 +155,15 @@ class _AnthropicProviderRequest(_ProviderRequest):
         elif chunk.type == "message_delta":
             usage = chunk.usage
+            ingest = True
+            # Web search will return an updated input tokens value at the end of streaming
+            if usage.input_tokens > 0:
+                self._ingest["units"]["text"]["input"] = usage.input_tokens
             self._ingest["units"]["text"]["output"] = usage.output_tokens
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @override
     def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Any:

payi/lib/BedrockInstrumentor.py CHANGED Viewed

@@ -10,7 +10,7 @@ from payi.lib.helpers import PayiCategories, PayiHeaderNames, payi_aws_bedrock_u
 from payi.types.ingest_units_params import Units, IngestUnitsParams
 from payi.types.pay_i_common_models_api_router_header_info_param import PayICommonModelsAPIRouterHeaderInfoParam
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 _supported_model_prefixes = ["meta.llama3", "anthropic.", "amazon.nova-pro", "amazon.nova-lite", "amazon.nova-micro"]
@@ -22,8 +22,6 @@ class BedrockInstrumentor:
         BedrockInstrumentor._instrumentor = instrumentor
         try:
-            import boto3  # type: ignore #  noqa: F401  I001
             wrap_function_wrapper(
                 "botocore.client",
                 "ClientCreator.create_client",
@@ -43,7 +41,7 @@ class BedrockInstrumentor:
 @_PayiInstrumentor.payi_wrapper
 def create_client_wrapper(instrumentor: _PayiInstrumentor, wrapped: Any, instance: Any, *args: Any, **kwargs: Any) -> Any: #  noqa: ARG001
     if kwargs.get("service_name") != "bedrock-runtime":
-        instrumentor._logger.debug(f"skipping client wrapper creation for {kwargs.get('service_name', '')} service")
+        # instrumentor._logger.debug(f"skipping client wrapper creation for {kwargs.get('service_name', '')} service")
         return wrapped(*args, **kwargs)
     try:
@@ -272,13 +270,14 @@ class _BedrockInvokeStreamingProviderRequest(_BedrockProviderRequest):
         self._is_anthropic: bool = model_id.startswith("anthropic.")
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
         if self._is_anthropic:
             return self.process_invoke_streaming_anthropic_chunk(chunk)
         else:
             return self.process_invoke_streaming_llama_chunk(chunk)
-    def process_invoke_streaming_anthropic_chunk(self, chunk: str) -> bool:
+    def process_invoke_streaming_anthropic_chunk(self, chunk: str) -> _ChunkResult:
+        ingest = False
         chunk_dict =  json.loads(chunk)
         type = chunk_dict.get("type", "")
@@ -301,18 +300,21 @@ class _BedrockInvokeStreamingProviderRequest(_BedrockProviderRequest):
         elif type == "message_delta":
             usage = chunk_dict['usage']
             self._ingest["units"]["text"]["output"] = usage['output_tokens']
+            ingest = True
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
-    def process_invoke_streaming_llama_chunk(self, chunk: str) -> bool:
+    def process_invoke_streaming_llama_chunk(self, chunk: str) -> _ChunkResult:
+        ingest = False
         chunk_dict =  json.loads(chunk)
         metrics = chunk_dict.get("amazon-bedrock-invocationMetrics", {})
         if metrics:
             input = metrics.get("inputTokenCount", 0)
             output = metrics.get("outputTokenCount", 0)
             self._ingest["units"]["text"] = Units(input=input, output=output)
+            ingest = True
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
 class _BedrockInvokeSynchronousProviderRequest(_BedrockProviderRequest):
     @override
@@ -374,7 +376,8 @@ class _BedrockConverseSynchronousProviderRequest(_BedrockProviderRequest):
 class _BedrockConverseStreamingProviderRequest(_BedrockProviderRequest):
     @override
-    def process_chunk(self, chunk: 'dict[str, Any]') -> bool:
+    def process_chunk(self, chunk: 'dict[str, Any]') -> _ChunkResult:
+        ingest = False
         metadata = chunk.get("metadata", {})
         if metadata:
@@ -383,4 +386,6 @@ class _BedrockConverseStreamingProviderRequest(_BedrockProviderRequest):
             output = usage["outputTokens"]
             self._ingest["units"]["text"] = Units(input=input, output=output)
-        return True
+            ingest = True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)

payi/lib/GoogleGenAiInstrumentor.py CHANGED Viewed

@@ -8,7 +8,7 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class GoogleGenAiInstrumentor:
@@ -248,7 +248,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
             prompt["tool_config"] = tool_config
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         response_dict: dict[str, Any] = chunk.to_json_dict()
         if "provider_response_id" not in self._ingest:
             id = response_dict.get("response_id", None)
@@ -267,8 +268,9 @@ class _GoogleGenAiRequest(_ProviderRequest):
         usage = response_dict.get("usage_metadata", {})
         if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
             self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
+            ingest = True
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @staticmethod
     def _is_character_billing_model(model: str) -> bool:

payi/lib/OpenAIInstrumentor.py CHANGED Viewed

@@ -9,7 +9,7 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories, PayiHeaderNames
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class OpenAiInstrumentor:
@@ -22,8 +22,6 @@ class OpenAiInstrumentor:
     @staticmethod
     def instrument(instrumentor: _PayiInstrumentor) -> None:
         try:
-            from openai import OpenAI  # type: ignore #  noqa: F401  I001
             wrap_function_wrapper(
                 "openai.resources.chat.completions",
                 "Completions.create",
@@ -47,7 +45,11 @@ class OpenAiInstrumentor:
                  "AsyncEmbeddings.create",
                 aembeddings_wrapper(instrumentor),
             )
+        except Exception as e:
+            instrumentor._logger.debug(f"Error instrumenting openai: {e}")
+        # responses separately as they are relatively new and the client may not be using the latest openai module
+        try:
             wrap_function_wrapper(
                 "openai.resources.responses",
                 "Responses.create",
@@ -62,8 +64,6 @@ class OpenAiInstrumentor:
         except Exception as e:
             instrumentor._logger.debug(f"Error instrumenting openai: {e}")
-            return
 @_PayiInstrumentor.payi_wrapper
 def embeddings_wrapper(
@@ -338,7 +338,8 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
         self._include_usage_added = False
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         model = model_to_dict(chunk)
         if "provider_response_id" not in self._ingest:
@@ -356,8 +357,9 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
             # packet which contains the usage to the client as they are not expecting the data
             if self._include_usage_added:
                 send_chunk_to_client = False
+            ingest = True
-        return send_chunk_to_client
+        return _ChunkResult(send_chunk_to_caller=send_chunk_to_client, ingest=ingest)
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -420,7 +422,8 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
             input_tokens_details_key=_OpenAiProviderRequest.responses_input_tokens_details_key)
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         model = model_to_dict(chunk)
         response: dict[str, Any] = model.get("response", {})
@@ -432,8 +435,9 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
         usage = response.get("usage")
         if usage:
             self.add_usage_units(usage)
+            ingest = True
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:

payi/lib/VertexInstrumentor.py CHANGED Viewed

@@ -8,33 +8,37 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class VertexInstrumentor:
     @staticmethod
     def instrument(instrumentor: _PayiInstrumentor) -> None:
         try:
-            import vertexai  # type: ignore #  noqa: F401  I001
             wrap_function_wrapper(
                 "vertexai.generative_models",
                 "GenerativeModel.generate_content",
                 generate_wrapper(instrumentor),
             )
-            wrap_function_wrapper(
-                "vertexai.preview.generative_models",
-                "GenerativeModel.generate_content",
-                generate_wrapper(instrumentor),
-            )
             wrap_function_wrapper(
                 "vertexai.generative_models",
                 "GenerativeModel.generate_content_async",
                 agenerate_wrapper(instrumentor),
             )
+        except Exception as e:
+            instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
+            return
+        # separate instrumetning preview functionality from released in case it fails
+        try:
+            wrap_function_wrapper(
+                "vertexai.preview.generative_models",
+                "GenerativeModel.generate_content",
+                generate_wrapper(instrumentor),
+            )
             wrap_function_wrapper(
                 "vertexai.preview.generative_models",
                 "GenerativeModel.generate_content_async",
@@ -93,11 +97,19 @@ class _GoogleVertexRequest(_ProviderRequest):
             )
         self._prompt_character_count = 0
         self._candiates_character_count = 0
+        self._model_name: Optional[str] = None
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
         from vertexai.generative_models import Content, Image, Part # type: ignore #  noqa: F401  I001
+        # Try to extra the model name as a backup if the response does not provide it (older vertexai versions do not)
+        if instance and hasattr(instance, "_model_name"):
+            model = instance._model_name
+            if model and isinstance(model, str):
+                # Extract the model name after the last slash
+                self._model_name = model.split('/')[-1]
         if not args:
             return True
@@ -191,17 +203,26 @@ class _GoogleVertexRequest(_ProviderRequest):
             # tool_config does not have to_dict or any other serializable object
             prompt["tool_config"] = str(tool_config)  # type: ignore
+    def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
+        model: Optional[str] = response.get("model_version", None)
+        if model:
+            return model
+        return self._model_name
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         response_dict: dict[str, Any] = chunk.to_dict()
         if "provider_response_id" not in self._ingest:
             id = response_dict.get("response_id", None)
             if id:
                 self._ingest["provider_response_id"] = id
-        model: str = response_dict.get("model_version", "")
-        self._ingest["resource"] = "google." + model
+        if "resource" not in self._ingest:
+            model: Optional[str] = self._get_model_name(response_dict)  # type: ignore[unreachable]
+            if model:
+                self._ingest["resource"] = "google." + model
         for candidate in response_dict.get("candidates", []):
             parts = candidate.get("content", {}).get("parts", [])
@@ -211,8 +232,9 @@ class _GoogleVertexRequest(_ProviderRequest):
         usage = response_dict.get("usage_metadata", {})
         if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
             self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
+            ingest = True
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @staticmethod
     def _is_character_billing_model(model: str) -> bool:
@@ -230,7 +252,7 @@ class _GoogleVertexRequest(_ProviderRequest):
         if id:
             self._ingest["provider_response_id"] = id
-        model: Optional[str] = response_dict.get("model_version", None)
+        model: Optional[str] = self._get_model_name(response_dict)
         if model:
             self._ingest["resource"] = "google." + model
@@ -256,7 +278,9 @@ class _GoogleVertexRequest(_ProviderRequest):
         prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
         candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
-        model: str = response_dict.get("model_version", "")
+        model: Optional[str] = self._get_model_name(response_dict)
+        if not model:
+            model = ""
         # for character billing only
         large_context = "" if input < 128000 else "_large_context"

payi/lib/instrument.py CHANGED Viewed

@@ -10,6 +10,7 @@ from abc import abstractmethod
 from enum import Enum
 from typing import Any, Set, Union, Callable, Optional, Sequence, TypedDict
 from datetime import datetime, timezone
+from dataclasses import dataclass
 from typing_extensions import deprecated
 import nest_asyncio  # type: ignore
@@ -28,6 +29,11 @@ from .Stopwatch import Stopwatch
 global _g_logger
 _g_logger: logging.Logger = logging.getLogger("payi.instrument")
+@dataclass
+class _ChunkResult:
+    send_chunk_to_caller: bool
+    ingest: bool = False
 class _ProviderRequest:
     def __init__(self, instrumentor: '_PayiInstrumentor', category: str, streaming_type: '_StreamingType'):
         self._instrumentor: '_PayiInstrumentor' = instrumentor
@@ -36,8 +42,8 @@ class _ProviderRequest:
         self._ingest: IngestUnitsParams = { "category": category, "units": {} } # type: ignore
         self._streaming_type: '_StreamingType' = streaming_type
-    def process_chunk(self, _chunk: Any) -> bool:
-        return True
+    def process_chunk(self, _chunk: Any) -> _ChunkResult:
+        return _ChunkResult(send_chunk_to_caller=True)
     def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Optional[object]:  # noqa: ARG002
         return None
@@ -275,8 +281,7 @@ class _PayiInstrumentor:
         if int(ingest_units.get("http_status_code") or 0) < 400:
             units = ingest_units.get("units", {})
             if not units or all(unit.get("input", 0) == 0 and unit.get("output", 0) == 0 for unit in units.values()):
-                self._logger.error('No units to ingest!')
-                return False
+                self._logger.info('ingesting with no token counts')
         if self._log_prompt_and_response and self._prompt_and_response_logger:
             response_json = ingest_units.pop("provider_response_json", None)
@@ -341,7 +346,7 @@ class _PayiInstrumentor:
             return ingest_response
         except Exception as e:
-            self._logger.error(f"Error Pay-i ingesting request: {e}")
+            self._logger.error(f"Error Pay-i async ingesting: exception {e}, request {ingest_units}")
         return None
@@ -413,7 +418,7 @@ class _PayiInstrumentor:
                 self._logger.error("No payi instance to ingest units")
         except Exception as e:
-            self._logger.error(f"Error Pay-i ingesting request: {e}")
+            self._logger.error(f"Error Pay-i ingesting: exception {e}, request {ingest_units}")
         return None
@@ -1105,6 +1110,8 @@ class _StreamIteratorWrapper(ObjectProxy):  # type: ignore
         self._first_token: bool = True
         self._is_bedrock: bool = request.is_bedrock()
         self._bedrock_from_stream: bool = bedrock_from_stream
+        self._ingested: bool = False
+        self._iter_started: bool = False
     def __enter__(self) -> Any:
         self._instrumentor._logger.debug(f"StreamIteratorWrapper: __enter__")
@@ -1123,6 +1130,7 @@ class _StreamIteratorWrapper(ObjectProxy):  # type: ignore
         await self.__wrapped__.__aexit__(exc_type, exc_val, exc_tb)  # type: ignore
     def __iter__(self) -> Any:
+        self._iter_started = True
         if self._is_bedrock:
             # MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
             self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock __iter__")
@@ -1134,13 +1142,19 @@ class _StreamIteratorWrapper(ObjectProxy):  # type: ignore
     def _iter_bedrock(self) -> Any:
         # botocore EventStream doesn't have a __next__ method so iterate over the wrapped object in place
         for event in self.__wrapped__: # type: ignore
+            result: Optional[_ChunkResult] = None
             if (self._bedrock_from_stream):
-                self._evaluate_chunk(event)
+                result = self._evaluate_chunk(event)
             else:
                 chunk = event.get('chunk') # type: ignore
                 if chunk:
                     decode = chunk.get('bytes').decode() # type: ignore
-                    self._evaluate_chunk(decode)
+                    result = self._evaluate_chunk(decode)
+            if result and result.ingest:
+                self._stop_iteration()
             yield event
         self._instrumentor._logger.debug(f"StreamIteratorWrapper: bedrock iter finished")
@@ -1148,40 +1162,60 @@ class _StreamIteratorWrapper(ObjectProxy):  # type: ignore
         self._stop_iteration()
     def __aiter__(self) -> Any:
+        self._iter_started = True
         self._instrumentor._logger.debug(f"StreamIteratorWrapper: __aiter__")
         return self
     def __next__(self) -> object:
         try:
             chunk: object = self.__wrapped__.__next__()  # type: ignore
+            if self._ingested:
+                self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ already ingested, not processing chunk {chunk}")
+                return chunk # type: ignore
+            result = self._evaluate_chunk(chunk)
+            if result.ingest:
+                self._stop_iteration()
+            if result.send_chunk_to_caller:
+                return chunk # type: ignore
+            else:
+                return self.__next__()
         except Exception as e:
             if isinstance(e, StopIteration):
                 self._stop_iteration()
             else:
                 self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ exception {e}")
             raise e
-        else:
-            if self._evaluate_chunk(chunk) == False:
-                return self.__next__()
-            return chunk # type: ignore
     async def __anext__(self) -> object:
         try:
             chunk: object = await self.__wrapped__.__anext__()  # type: ignore
+            if self._ingested:
+                self._instrumentor._logger.debug(f"StreamIteratorWrapper: __next__ already ingested, not processing chunk {chunk}")
+                return chunk # type: ignore
+            result = self._evaluate_chunk(chunk)
+            if result.ingest:
+                await self._astop_iteration()
+            if  result.send_chunk_to_caller:
+                return chunk # type: ignore
+            else:
+                return await self.__anext__()
         except Exception as e:
             if isinstance(e, StopAsyncIteration):
                 await self._astop_iteration()
             else:
                 self._instrumentor._logger.debug(f"StreamIteratorWrapper: __anext__ exception {e}")
             raise e
-        else:
-            if self._evaluate_chunk(chunk) == False:
-                return await self.__anext__()
-            return chunk # type: ignore
-    def _evaluate_chunk(self, chunk: Any) -> bool:
+    def _evaluate_chunk(self, chunk: Any) -> _ChunkResult:
         if self._first_token:
             self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
             self._first_token = False
@@ -1192,7 +1226,7 @@ class _StreamIteratorWrapper(ObjectProxy):  # type: ignore
         return self._request.process_chunk(chunk)
     def _process_stop_iteration(self) -> None:
-        self._instrumentor._logger.debug(f"StreamIteratorWrapper: stop iteration")
+        self._instrumentor._logger.debug(f"StreamIteratorWrapper: process stop iteration")
         self._stopwatch.stop()
         self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
@@ -1202,12 +1236,23 @@ class _StreamIteratorWrapper(ObjectProxy):  # type: ignore
             self._request._ingest["provider_response_json"] = self._responses
     async def _astop_iteration(self) -> None:
+        if self._ingested:
+            self._instrumentor._logger.debug(f"StreamIteratorWrapper: astop iteration already ingested, skipping")
+            return
         self._process_stop_iteration()
         await self._instrumentor._aingest_units(self._request._ingest)
+        self._ingested = True
     def _stop_iteration(self) -> None:
+        if self._ingested:
+            self._instrumentor._logger.debug(f"StreamIteratorWrapper: stop iteration already ingested, skipping")
+            return
         self._process_stop_iteration()
         self._instrumentor._ingest_units(self._request._ingest)
+        self._ingested = True
     @staticmethod
     def chunk_to_json(chunk: Any) -> str:
@@ -1241,7 +1286,6 @@ class _StreamManagerWrapper(ObjectProxy):  # type: ignore
         self._responses: list[str] = []
         self._request: _ProviderRequest = request
         self._first_token: bool = True
-        self._done: bool = False
     def __enter__(self) -> _StreamIteratorWrapper:
         self._instrumentor._logger.debug(f"_StreamManagerWrapper: __enter__")
@@ -1275,92 +1319,103 @@ class _GeneratorWrapper:  # type: ignore
         self._responses: list[str] = []
         self._request: _ProviderRequest = request
         self._first_token: bool = True
-        self._done: bool = False
+        self._ingested: bool = False
+        self._iter_started: bool = False
     def __iter__(self) -> Any:
+        self._iter_started = True
         self._instrumentor._logger.debug(f"GeneratorWrapper: __iter__")
         return self
     def __aiter__(self) -> Any:
         self._instrumentor._logger.debug(f"GeneratorWrapper: __aiter__")
         return self
-    def __next__(self) -> Any:
-        if self._done:
-            raise StopIteration
+    def _process_chunk(self, chunk: Any) -> _ChunkResult:
+        if self._first_token:
+            self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
+            self._first_token = False
+        if self._log_prompt_and_response:
+            dict = self._chunk_to_dict(chunk)
+            self._responses.append(json.dumps(dict))
+        return self._request.process_chunk(chunk)
+    def __next__(self) -> Any:
         try:
             chunk = next(self._generator)
-            return self._process_chunk(chunk)
+            result = self._process_chunk(chunk)
+            if result.ingest:
+                self._stop_iteration()
+            # ignore result.send_chunk_to_caller:
+            return chunk
         except Exception as e:
             if isinstance(e, StopIteration):
-                self._process_stop_iteration()
+                self._stop_iteration()
             else:
                 self._instrumentor._logger.debug(f"GeneratorWrapper: __next__ exception {e}")
             raise e
     async def __anext__(self) -> Any:
-        if self._done:
-            raise StopAsyncIteration
         try:
             chunk = await anext(self._generator) # type: ignore
-            return self._process_chunk(chunk)
+            result = self._process_chunk(chunk)
+            if result.ingest:
+                await self._astop_iteration()
+            # ignore result.send_chunk_to_caller:
+            return chunk # type: ignore
         except Exception as e:
             if isinstance(e, StopAsyncIteration):
-                await self._process_async_stop_iteration()
+                await self._astop_iteration()
             else:
                 self._instrumentor._logger.debug(f"GeneratorWrapper: __anext__ exception {e}")
             raise e
     @staticmethod
     def _chunk_to_dict(chunk: Any) -> 'dict[str, object]':
-        if hasattr(chunk, "to_json"):
-            return chunk.to_json() # type: ignore
+        if hasattr(chunk, "to_dict"):
+            return chunk.to_dict() # type: ignore
         elif hasattr(chunk, "to_json_dict"):
             return chunk.to_json_dict() # type: ignore
         else:
             return {}
-    def _process_chunk(self, chunk: Any) -> Any:
-        if self._first_token:
-            self._request._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
-            self._first_token = False
-        if self._log_prompt_and_response:
-            dict = self._chunk_to_dict(chunk)
-            self._responses.append(json.dumps(dict))
-        self._request.process_chunk(chunk)
-        return chunk
+    def _stop_iteration(self) -> None:
+        if self._ingested:
+            self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration already ingested, skipping")
+            return
-    def _process_stop_iteration(self) -> None:
-        self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration")
+        self._process_stop_iteration()
-        self._stopwatch.stop()
-        self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
-        self._request._ingest["http_status_code"] = 200
-        if self._log_prompt_and_response:
-            self._request._ingest["provider_response_json"] = self._responses
         self._instrumentor._ingest_units(self._request._ingest)
-        self._done = True
+        self._ingested = True
-    async def _process_async_stop_iteration(self) -> None:
-        self._instrumentor._logger.debug(f"GeneratorWrapper: async stop iteration")
+    async def _astop_iteration(self) -> None:
+        if self._ingested:
+            self._instrumentor._logger.debug(f"GeneratorWrapper: astop iteration already ingested, skipping")
+            return
+        self._process_stop_iteration()
-        self._stopwatch.stop()
+        await self._instrumentor._aingest_units(self._request._ingest)
+        self._ingested = True
+    def _process_stop_iteration(self) -> None:
+        self._instrumentor._logger.debug(f"GeneratorWrapper: stop iteration")
+        self._stopwatch.stop()
         self._request._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
         self._request._ingest["http_status_code"] = 200
         if self._log_prompt_and_response:
             self._request._ingest["provider_response_json"] = self._responses
-        await self._instrumentor._aingest_units(self._request._ingest)
-        self._done = True
 global _instrumentor
 _instrumentor: Optional[_PayiInstrumentor] = None
@@ -1630,4 +1685,4 @@ def proxy(
             return _proxy_wrapper
-    return _proxy
+    return _proxy

{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: payi
-Version: 0.1.0a82
+Version: 0.1.0a83
 Summary: The official Python library for the payi API
 Project-URL: Homepage, https://github.com/Pay-i/pay-i-python
 Project-URL: Repository, https://github.com/Pay-i/pay-i-python

{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/RECORD RENAMED Viewed

@@ -11,7 +11,7 @@ payi/_resource.py,sha256=j2jIkTr8OIC8sU6-05nxSaCyj4MaFlbZrwlyg4_xJos,1088
 payi/_response.py,sha256=rh9oJAvCKcPwQFm4iqH_iVrmK8bNx--YP_A2a4kN1OU,28776
 payi/_streaming.py,sha256=Z_wIyo206T6Jqh2rolFg2VXZgX24PahLmpURp0-NssU,10092
 payi/_types.py,sha256=7jE5MoQQFVoVxw5vVzvZ2Ao0kcjfNOGsBgyJfLBEnMo,6195
-payi/_version.py,sha256=KAmqXUJQtR0NFE11hO5_6a9OQxaHRxsEAfoNmO8-neY,165
+payi/_version.py,sha256=xOYzE4HfPmVs-tN-N7yfhmNSvAgnTtRTnr0gUxo0tVg,165
 payi/pagination.py,sha256=k2356QGPOUSjRF2vHpwLBdF6P-2vnQzFfRIJQAHGQ7A,1258
 payi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 payi/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
@@ -25,14 +25,14 @@ payi/_utils/_transform.py,sha256=n7kskEWz6o__aoNvhFoGVyDoalNe6mJwp-g7BWkdj88,156
 payi/_utils/_typing.py,sha256=D0DbbNu8GnYQTSICnTSHDGsYXj8TcAKyhejb0XcnjtY,4602
 payi/_utils/_utils.py,sha256=ts4CiiuNpFiGB6YMdkQRh2SZvYvsl7mAF-JWHCcLDf4,12312
 payi/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
-payi/lib/AnthropicInstrumentor.py,sha256=PNSrEsij-rRaRN0_rjqQI00NK_FahrrRN4gikpDiiTc,9186
-payi/lib/BedrockInstrumentor.py,sha256=qXkrYeYHba3gOnp_VnQ6sMAUVLx0RpeSJ8gMWz7-A2g,15136
-payi/lib/GoogleGenAiInstrumentor.py,sha256=WE_3tyrp96UDHXymY4ky28wtFTROF-v5mSzOP2XuGxw,14489
-payi/lib/OpenAIInstrumentor.py,sha256=hrEEzzeGtQmZfLxGsyMKhDtB0S7YPNfsjgMG5vx5jMA,18485
+payi/lib/AnthropicInstrumentor.py,sha256=O48WQIK1WvjAz1lvIEYoqgnS-UmzKhb5becNpSjinbE,9515
+payi/lib/BedrockInstrumentor.py,sha256=vtJoPsYJ8Re3ODe3onTPcq9nSblL7IBjnE811qhMkdU,15424
+payi/lib/GoogleGenAiInstrumentor.py,sha256=ru2odN7aA66z_UGAHRpHsJ1dm5HbffQ0eFhcAHOqHR4,14610
+payi/lib/OpenAIInstrumentor.py,sha256=FjIRlQk5t95ySspH0VXsBv7my_f-c6HluvReY2hRvmM,18852
 payi/lib/Stopwatch.py,sha256=7OJlxvr2Jyb6Zr1LYCYKczRB7rDVKkIR7gc4YoleNdE,764
-payi/lib/VertexInstrumentor.py,sha256=E0511pzzB5e3xY7xNSq_gn2BERnnWRPWkOx4tyqvQ3A,12779
+payi/lib/VertexInstrumentor.py,sha256=lF1BcKKCQdHOMn563e1OdzeDfhjLhPyhZSyM3PycZjA,13881
 payi/lib/helpers.py,sha256=K1KAfWrpPT1UUGNxspLe1lHzQjP3XV5Pkh9IU4pKMok,4624
-payi/lib/instrument.py,sha256=w55jtWm6PoIJqAPEeRJXWc41QTuUxzgQB9BxIdFYMdU,64622
+payi/lib/instrument.py,sha256=qgV6f-Z2Egl1WsjTDyxg-2Unx6iIizhV4DPi44qPEAY,66360
 payi/resources/__init__.py,sha256=1rtrPLWbNt8oJGOp6nwPumKLJ-ftez0B6qwLFyfcoP4,2972
 payi/resources/ingest.py,sha256=8HNHEyfgIyJNqCh0rOhO9msoc61-8IyifJ6AbxjCrDg,22612
 payi/resources/categories/__init__.py,sha256=w5gMiPdBSzJA_qfoVtFBElaoe8wGf_O63R7R1Spr6Gk,1093
@@ -142,7 +142,7 @@ payi/types/use_cases/definitions/kpi_retrieve_response.py,sha256=uQXliSvS3k-yDYw
 payi/types/use_cases/definitions/kpi_update_params.py,sha256=jbawdWAdMnsTWVH0qfQGb8W7_TXe3lq4zjSRu44d8p8,373
 payi/types/use_cases/definitions/kpi_update_response.py,sha256=zLyEoT0S8d7XHsnXZYT8tM7yDw0Aze0Mk-_Z6QeMtc8,459
 payi/types/use_cases/definitions/limit_config_create_params.py,sha256=pzQza_16N3z8cFNEKr6gPbFvuGFrwNuGxAYb--Kbo2M,449
-payi-0.1.0a82.dist-info/METADATA,sha256=mD2GWPPSx1-aeLthkLrXEp3Ng4-TBjMN6mDMIljfxkc,15180
-payi-0.1.0a82.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
-payi-0.1.0a82.dist-info/licenses/LICENSE,sha256=CQt03aM-P4a3Yg5qBg3JSLVoQS3smMyvx7tYg_6V7Gk,11334
-payi-0.1.0a82.dist-info/RECORD,,
+payi-0.1.0a83.dist-info/METADATA,sha256=uY2n8H_NGmAxXXCm6a6yFC6HXxiuAl3I6RvYn5W2FcU,15180
+payi-0.1.0a83.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
+payi-0.1.0a83.dist-info/licenses/LICENSE,sha256=CQt03aM-P4a3Yg5qBg3JSLVoQS3smMyvx7tYg_6V7Gk,11334
+payi-0.1.0a83.dist-info/RECORD,,

{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/WHEEL RENAMED Viewed

File without changes

{payi-0.1.0a82.dist-info → payi-0.1.0a83.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

payi 0.1.0a82__py3-none-any.whl → 0.1.0a83__py3-none-any.whl

Potentially problematic release.

payi 0.1.0a82py3-none-any.whl → 0.1.0a83py3-none-any.whl