PyPI - payi - Versions diffs - 0.1.0a64__py3-none-any.whl → 0.1.0a65__py3-none-any.whl - Mend

payi 0.1.0a64py3-none-any.whl → 0.1.0a65py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of payi might be problematic. Click here for more details.

Files changed (9) hide show

payi/_version.py +1 -1
payi/lib/AnthropicInstrumentor.py +60 -60
payi/lib/BedrockInstrumentor.py +102 -92
payi/lib/OpenAIInstrumentor.py +90 -58
payi/lib/instrument.py +76 -75
{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/METADATA +1 -1
{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/RECORD +9 -9
{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/WHEEL +0 -0
{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/licenses/LICENSE +0 -0

payi/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 __title__ = "payi"
-__version__ = "0.1.0-alpha.64"  # x-release-please-version
+__version__ = "0.1.0-alpha.65"  # x-release-please-version

payi/lib/AnthropicInstrumentor.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import logging
 from typing import Any, Union
+from typing_extensions import override
 import tiktoken
 from wrapt import wrap_function_wrapper  # type: ignore
-from payi.types import IngestUnitsParams
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _PayiInstrumentor
+from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
 class AnthropicIntrumentor:
@@ -55,9 +55,7 @@ def chat_wrapper(
 ) -> Any:
     return instrumentor.chat_wrapper(
         "system.anthropic",
-        process_chunk,
-        process_request,
-        process_synchronous_response,
+        _AnthropicProviderRequest(instrumentor),
         _IsStreaming.kwargs,
         wrapped,
         instance,
@@ -75,9 +73,7 @@ async def achat_wrapper(
 ) -> Any:
     return await instrumentor.achat_wrapper(
         "system.anthropic",
-        process_chunk,
-        process_request,
-        process_synchronous_response,
+        _AnthropicProviderRequest(instrumentor),
         _IsStreaming.kwargs,
         wrapped,
         instance,
@@ -85,17 +81,39 @@ async def achat_wrapper(
         kwargs,
     )
+class _AnthropicProviderRequest(_ProviderRequest):
+    @override
+    def process_chunk(self, chunk: Any) -> bool:
+        if chunk.type == "message_start":
+            self._ingest["provider_response_id"] = chunk.message.id
-def process_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
-    if chunk.type == "message_start":
-        ingest["provider_response_id"] = chunk.message.id
+            usage = chunk.message.usage
+            units = self._ingest["units"]
-        usage = chunk.message.usage
-        units = ingest["units"]
+            input = _PayiInstrumentor.update_for_vision(usage.input_tokens, units, self._estimated_prompt_tokens)
-        input = _PayiInstrumentor.update_for_vision(usage.input_tokens, units)
+            units["text"] = Units(input=input, output=0)
-        units["text"] = Units(input=input, output=0)
+            if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
+                text_cache_write = usage.cache_creation_input_tokens
+                units["text_cache_write"] = Units(input=text_cache_write, output=0)
+            if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens > 0:
+                text_cache_read = usage.cache_read_input_tokens
+                units["text_cache_read"] = Units(input=text_cache_read, output=0)
+        elif chunk.type == "message_delta":
+            usage = chunk.usage
+            self._ingest["units"]["text"]["output"] = usage.output_tokens
+        return True
+    @override
+    def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Any:
+        usage = response.usage
+        input = usage.input_tokens
+        output = usage.output_tokens
+        units: dict[str, Units] = self._ingest["units"]
         if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
             text_cache_write = usage.cache_creation_input_tokens
@@ -105,35 +123,37 @@ def process_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
             text_cache_read = usage.cache_read_input_tokens
             units["text_cache_read"] = Units(input=text_cache_read, output=0)
-    elif chunk.type == "message_delta":
-        usage = chunk.usage
-        ingest["units"]["text"]["output"] = usage.output_tokens
-def process_synchronous_response(response: Any, ingest: IngestUnitsParams, log_prompt_and_response: bool, *args: Any, **kwargs: 'dict[str, Any]') -> Any: # noqa: ARG001
-    usage = response.usage
-    input = usage.input_tokens
-    output = usage.output_tokens
-    units: dict[str, Units] = ingest["units"]
-    if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens > 0:
-        text_cache_write = usage.cache_creation_input_tokens
-        units["text_cache_write"] = Units(input=text_cache_write, output=0)
+        input = _PayiInstrumentor.update_for_vision(input, units, self._estimated_prompt_tokens)
-    if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens > 0:
-        text_cache_read = usage.cache_read_input_tokens
-        units["text_cache_read"] = Units(input=text_cache_read, output=0)
+        units["text"] = Units(input=input, output=output)
-    input = _PayiInstrumentor.update_for_vision(input, units)
+        if log_prompt_and_response:
+            self._ingest["provider_response_json"] = response.to_json()
+        self._ingest["provider_response_id"] = response.id
+        return None
-    units["text"] = Units(input=input, output=output)
+    @override
+    def process_request(self, kwargs: Any) -> None:
+        messages = kwargs.get("messages")
+        if not messages or len(messages) == 0:
+            return
+        estimated_token_count = 0
+        has_image = False
-    if log_prompt_and_response:
-        ingest["provider_response_json"] = response.to_json()
-    ingest["provider_response_id"] = response.id
-    return None
+        enc = tiktoken.get_encoding("cl100k_base")
+        for message in messages:
+            msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
+            if msg_has_image:
+                has_image = True
+                estimated_token_count += msg_prompt_tokens
+        if not has_image or estimated_token_count == 0:
+            return
+        self._estimated_prompt_tokens = estimated_token_count
 def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'list[Any]']) -> 'tuple[bool, int]':
     if isinstance(content, str):
@@ -146,23 +166,3 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
         token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
         return has_image, token_count
-def process_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
-    messages = kwargs.get("messages")
-    if not messages or len(messages) == 0:
-        return
-    estimated_token_count = 0
-    has_image = False
-    enc = tiktoken.get_encoding("cl100k_base")
-    for message in messages:
-        msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
-        if msg_has_image:
-            has_image = True
-            estimated_token_count += msg_prompt_tokens
-    if not has_image or estimated_token_count == 0:
-        return
-    ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)

payi/lib/BedrockInstrumentor.py CHANGED Viewed

@@ -2,13 +2,14 @@ import json
 import logging
 from typing import Any
 from functools import wraps
+from typing_extensions import override
 from wrapt import ObjectProxy, wrap_function_wrapper  # type: ignore
 from payi.types.ingest_units_params import Units, IngestUnitsParams
 from payi.types.pay_i_common_models_api_router_header_info_param import PayICommonModelsAPIRouterHeaderInfoParam
-from .instrument import _IsStreaming, _PayiInstrumentor
+from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
 class BedrockInstrumentor:
@@ -103,9 +104,7 @@ def wrap_invoke(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
         if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
             return instrumentor.chat_wrapper(
                 "system.aws.bedrock",
-                None,
-                process_invoke_request,
-                process_synchronous_invoke_response,
+                _BedrockInvokeSynchronousProviderRequest(instrumentor),
                 _IsStreaming.false,
                 wrapped,
                 None,
@@ -119,14 +118,12 @@ def wrap_invoke(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
 def wrap_invoke_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
     @wraps(wrapped)
     def invoke_wrapper(*args: Any, **kwargs: Any) -> Any:
-        modelId:str = kwargs.get("modelId", "") # type: ignore
+        model_id: str = kwargs.get("modelId", "") # type: ignore
-        if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
+        if model_id.startswith("meta.llama3") or model_id.startswith("anthropic."):
             return instrumentor.chat_wrapper(
                 "system.aws.bedrock",
-                process_invoke_streaming_anthropic_chunk if modelId.startswith("anthropic.") else process_invoke_streaming_llama_chunk,
-                process_invoke_request,
-                None,
+                _BedrockInvokeStreamingProviderRequest(instrumentor, model_id),
                 _IsStreaming.true,
                 wrapped,
                 None,
@@ -145,9 +142,7 @@ def wrap_converse(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
         if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
             return instrumentor.chat_wrapper(
                 "system.aws.bedrock",
-                None,
-                process_converse_request,
-                process_synchronous_converse_response,
+                _BedrockConverseSynchronousProviderRequest(instrumentor),
                 _IsStreaming.false,
                 wrapped,
                 None,
@@ -161,14 +156,12 @@ def wrap_converse(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
 def wrap_converse_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
     @wraps(wrapped)
     def invoke_wrapper(*args: Any, **kwargs: Any) -> Any:
-        modelId:str = kwargs.get("modelId", "") # type: ignore
+        model_id: str = kwargs.get("modelId", "") # type: ignore
-        if modelId.startswith("meta.llama3") or modelId.startswith("anthropic."):
+        if model_id.startswith("meta.llama3") or model_id.startswith("anthropic."):
             return instrumentor.chat_wrapper(
                 "system.aws.bedrock",
-                process_converse_streaming_chunk,
-                process_converse_request,
-                None,
+                _BedrockConverseStreamingProviderRequest(instrumentor),
                 _IsStreaming.true,
                 wrapped,
                 None,
@@ -179,104 +172,121 @@ def wrap_converse_stream(instrumentor: _PayiInstrumentor, wrapped: Any) -> Any:
     return invoke_wrapper
-def process_invoke_streaming_anthropic_chunk(chunk: str, ingest: IngestUnitsParams) -> None:
-    chunk_dict =  json.loads(chunk)
-    type = chunk_dict.get("type", "")
+class _BedrockInvokeStreamingProviderRequest(_ProviderRequest):
+    def __init__(self, instrumentor: _PayiInstrumentor, model_id: str):
+        super().__init__(instrumentor)
+        self._is_anthropic: bool = model_id.startswith("anthropic.")
-    if type == "message_start":
-        usage = chunk_dict['message']['usage']
-        units = ingest["units"]
+    @override
+    def process_chunk(self, chunk: Any) -> bool:
+        if self._is_anthropic:
+            return self.process_invoke_streaming_anthropic_chunk(chunk)
+        else:
+            return self.process_invoke_streaming_llama_chunk(chunk)
-        input = _PayiInstrumentor.update_for_vision(usage['input_tokens'], units)
+    def process_invoke_streaming_anthropic_chunk(self, chunk: str) -> bool:
+        chunk_dict =  json.loads(chunk)
+        type = chunk_dict.get("type", "")
-        units["text"] = Units(input=input, output=0)
+        if type == "message_start":
+            usage = chunk_dict['message']['usage']
+            units = self._ingest["units"]
-        text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
-        if text_cache_write > 0:
-            units["text_cache_write"] = Units(input=text_cache_write, output=0)
+            input = _PayiInstrumentor.update_for_vision(usage['input_tokens'], units, self._estimated_prompt_tokens)
-        text_cache_read: int = usage.get("cache_read_input_tokens", 0)
-        if text_cache_read > 0:
-            units["text_cache_read"] = Units(input=text_cache_read, output=0)
+            units["text"] = Units(input=input, output=0)
-    elif type == "message_delta":
-        usage = chunk_dict['usage']
-        ingest["units"]["text"]["output"] = usage['output_tokens']
+            text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
+            if text_cache_write > 0:
+                units["text_cache_write"] = Units(input=text_cache_write, output=0)
-def process_invoke_streaming_llama_chunk(chunk: str, ingest: IngestUnitsParams) -> None:
-    chunk_dict =  json.loads(chunk)
-    metrics = chunk_dict.get("amazon-bedrock-invocationMetrics", {})
-    if metrics:
-        input = metrics.get("inputTokenCount", 0)
-        output = metrics.get("outputTokenCount", 0)
-        ingest["units"]["text"] = Units(input=input, output=output)
-def process_synchronous_invoke_response(
+            text_cache_read: int = usage.get("cache_read_input_tokens", 0)
+            if text_cache_read > 0:
+                units["text_cache_read"] = Units(input=text_cache_read, output=0)
+        elif type == "message_delta":
+            usage = chunk_dict['usage']
+            self._ingest["units"]["text"]["output"] = usage['output_tokens']
+        return True
+    def process_invoke_streaming_llama_chunk(self, chunk: str) -> bool:
+        chunk_dict =  json.loads(chunk)
+        metrics = chunk_dict.get("amazon-bedrock-invocationMetrics", {})
+        if metrics:
+            input = metrics.get("inputTokenCount", 0)
+            output = metrics.get("outputTokenCount", 0)
+            self._ingest["units"]["text"] = Units(input=input, output=output)
+        return True
+class _BedrockInvokeSynchronousProviderRequest(_ProviderRequest):
+    @override
+    def process_synchronous_response(
+        self,
         response: Any,
-        ingest: IngestUnitsParams,
         log_prompt_and_response: bool,
-        instrumentor: _PayiInstrumentor,
-        **kargs: Any) -> Any: #  noqa: ARG001
+        kwargs: Any) -> Any:
-    metadata = response.get("ResponseMetadata", {})
+        metadata = response.get("ResponseMetadata", {})
-    request_id = metadata.get("RequestId", "")
-    if request_id:
-        ingest["provider_response_id"] = request_id
+        request_id = metadata.get("RequestId", "")
+        if request_id:
+            self._ingest["provider_response_id"] = request_id
-    response_headers = metadata.get("HTTPHeaders", {}).copy()
-    if response_headers:
-        ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
+        response_headers = metadata.get("HTTPHeaders", {}).copy()
+        if response_headers:
+            self._ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
-    response["body"] = InvokeResponseWrapper(
-        response=response["body"],
-        instrumentor=instrumentor,
-        ingest=ingest,
-        log_prompt_and_response=log_prompt_and_response)
+        response["body"] = InvokeResponseWrapper(
+            response=response["body"],
+            instrumentor=self._instrumentor,
+            ingest=self._ingest,
+            log_prompt_and_response=log_prompt_and_response)
-    return response
+        return response
-def process_invoke_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: #  noqa: ARG001
-    return
-def process_converse_streaming_chunk(chunk: 'dict[str, Any]', ingest: IngestUnitsParams) -> None:
-    metadata = chunk.get("metadata", {})
+class _BedrockConverseSynchronousProviderRequest(_ProviderRequest):
+    @override
+    def process_synchronous_response(
+        self,
+        response: 'dict[str, Any]',
+        log_prompt_and_response: bool,
+        kwargs: Any) -> Any:
-    if metadata:
-        usage = metadata['usage']
+        usage = response["usage"]
         input = usage["inputTokens"]
         output = usage["outputTokens"]
-        ingest["units"]["text"] = Units(input=input, output=output)
+        units: dict[str, Units] = self._ingest["units"]
+        units["text"] = Units(input=input, output=output)
-def process_synchronous_converse_response(
-        response: 'dict[str, Any]',
-        ingest: IngestUnitsParams,
-        log_prompt_and_response: bool,
-        **kargs: Any) -> Any: #  noqa: ARG001
+        metadata = response.get("ResponseMetadata", {})
-    usage = response["usage"]
-    input = usage["inputTokens"]
-    output = usage["outputTokens"]
-    units: dict[str, Units] = ingest["units"]
-    units["text"] = Units(input=input, output=output)
+        request_id = metadata.get("RequestId", "")
+        if request_id:
+            self._ingest["provider_response_id"] = request_id
-    metadata = response.get("ResponseMetadata", {})
+        response_headers = metadata.get("HTTPHeaders", {})
+        if response_headers:
+            self._ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
-    request_id = metadata.get("RequestId", "")
-    if request_id:
-        ingest["provider_response_id"] = request_id
+        if log_prompt_and_response:
+            response_without_metadata = response.copy()
+            response_without_metadata.pop("ResponseMetadata", None)
+            self._ingest["provider_response_json"] = json.dumps(response_without_metadata)
-    response_headers = metadata.get("HTTPHeaders", {})
-    if response_headers:
-        ingest["provider_response_headers"] = [PayICommonModelsAPIRouterHeaderInfoParam(name=k, value=v) for k, v in response_headers.items()]
+        return None
-    if log_prompt_and_response:
-        response_without_metadata = response.copy()
-        response_without_metadata.pop("ResponseMetadata", None)
-        ingest["provider_response_json"] = json.dumps(response_without_metadata)
+class _BedrockConverseStreamingProviderRequest(_ProviderRequest):
+    @override
+    def process_chunk(self, chunk: 'dict[str, Any]') -> bool:
+        metadata = chunk.get("metadata", {})
-    return None
+        if metadata:
+            usage = metadata['usage']
+            input = usage["inputTokens"]
+            output = usage["outputTokens"]
+            self._ingest["units"]["text"] = Units(input=input, output=output)
-def process_converse_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: #  noqa: ARG001
-    return
+        return True

payi/lib/OpenAIInstrumentor.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
-from typing import Any, Union
+from typing import Any, Union, Optional
+from typing_extensions import override
 from importlib.metadata import version
 import tiktoken  # type: ignore
@@ -9,7 +10,7 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.types import IngestUnitsParams
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _PayiInstrumentor
+from .instrument import _IsStreaming, _ProviderRequest, _PayiInstrumentor
 class OpenAiInstrumentor:
@@ -63,9 +64,7 @@ def embeddings_wrapper(
 ) -> Any:
     return instrumentor.chat_wrapper(
         "system.openai",
-        None, # process_chat_chunk,
-        None, # process_chat_request,
-        process_ebmeddings_synchronous_response,
+        _OpenAiEmbeddingsProviderRequest(instrumentor),
         _IsStreaming.false,
         wrapped,
         instance,
@@ -83,9 +82,7 @@ async def aembeddings_wrapper(
 ) -> Any:
     return await instrumentor.achat_wrapper(
         "system.openai",
-        None, # process_chat_chunk,
-        None, # process_chat_request,
-        process_ebmeddings_synchronous_response,
+        _OpenAiEmbeddingsProviderRequest(instrumentor),
         _IsStreaming.false,
         wrapped,
         instance,
@@ -103,9 +100,7 @@ def chat_wrapper(
 ) -> Any:
     return instrumentor.chat_wrapper(
         "system.openai",
-        process_chat_chunk,
-        process_chat_request,
-        process_chat_synchronous_response,
+        _OpenAiChatProviderRequest(instrumentor),
         _IsStreaming.kwargs,
         wrapped,
         instance,
@@ -123,9 +118,7 @@ async def achat_wrapper(
 ) -> Any:
     return await instrumentor.achat_wrapper(
         "system.openai",
-        process_chat_chunk,
-        process_chat_request,
-        process_chat_synchronous_response,
+        _OpenAiChatProviderRequest(instrumentor),
         _IsStreaming.kwargs,
         wrapped,
         instance,
@@ -133,13 +126,89 @@ async def achat_wrapper(
         kwargs,
     )
-def process_ebmeddings_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, **kwargs: Any) -> Any: #  noqa: ARG001
-    return process_chat_synchronous_response(response, ingest, log_prompt_and_response, **kwargs)
+class _OpenAiEmbeddingsProviderRequest(_ProviderRequest):
+    @override
+    def process_synchronous_response(
+        self,
+        response: Any,
+        log_prompt_and_response: bool,
+        kwargs: Any) -> Any:
+        return process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
+class _OpenAiChatProviderRequest(_ProviderRequest):
+    def __init__(self, instrumentor: _PayiInstrumentor):
+        super().__init__(instrumentor)
+        self._include_usage_added = False
+    @override
+    def process_chunk(self, chunk: Any) -> bool:
+        model = model_to_dict(chunk)
+        if "provider_response_id" not in self._ingest:
+            response_id = model.get("id", None)
+            if response_id:
+                self._ingest["provider_response_id"] = response_id
+        send_chunk_to_client = True
+        usage = model.get("usage")
+        if usage:
+            add_usage_units(usage, self._ingest["units"], self._estimated_prompt_tokens)
+            # If we aded "include_usage" in the request on behalf of the client, do not return the extra
+            # packet which contains the usage to the client as they are not expecting the data
+            if self._include_usage_added:
+                send_chunk_to_client = False
-def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, **kwargs: Any) -> Any: #  noqa: ARG001
+        return send_chunk_to_client
+    @override
+    def process_request(self, kwargs: Any) -> None: # noqa: ARG001
+        messages = kwargs.get("messages", None)
+        if not messages or len(messages) == 0:
+            return
+        estimated_token_count = 0
+        has_image = False
+        try:
+            enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
+        except KeyError:
+            enc = tiktoken.get_encoding("o200k_base") # type: ignore
+        for message in messages:
+            msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
+            if msg_has_image:
+                has_image = True
+                estimated_token_count += msg_prompt_tokens
+        if has_image and estimated_token_count > 0:
+            self._estimated_prompt_tokens = estimated_token_count
+        stream: bool = kwargs.get("stream", False)
+        if stream:
+            add_include_usage = True
+            stream_options: dict[str, Any] = kwargs.get("stream_options", None)
+            if stream_options and "include_usage" in stream_options:
+                add_include_usage = stream_options["include_usage"] == False
+            if add_include_usage:
+                kwargs['stream_options'] = {"include_usage": True}
+                self._include_usage_added = True
+    @override
+    def process_synchronous_response(
+        self,
+        response: Any,
+        log_prompt_and_response: bool,
+        kwargs: Any) -> Any:
+        process_chat_synchronous_response(response, self._ingest, log_prompt_and_response, self._estimated_prompt_tokens)
+def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams, log_prompt_and_response: bool, estimated_prompt_tokens: Optional[int]) -> Any:
     response_dict = model_to_dict(response)
-    add_usage_units(response_dict.get("usage", {}), ingest["units"])
+    add_usage_units(response_dict.get("usage", {}), ingest["units"], estimated_prompt_tokens)
     if log_prompt_and_response:
         ingest["provider_response_json"] = [json.dumps(response_dict)]
@@ -149,19 +218,6 @@ def process_chat_synchronous_response(response: str, ingest: IngestUnitsParams,
     return None
-def process_chat_chunk(chunk: Any, ingest: IngestUnitsParams) -> None:
-    model = model_to_dict(chunk)
-    if "provider_response_id" not in ingest:
-        response_id = model.get("id", None)
-        if response_id:
-            ingest["provider_response_id"] = response_id
-    usage = model.get("usage")
-    if usage:
-        add_usage_units(usage, ingest["units"])
 def model_to_dict(model: Any) -> Any:
     if version("pydantic") < "2.0.0":
         return model.dict()
@@ -173,7 +229,7 @@ def model_to_dict(model: Any) -> Any:
         return model
-def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
+def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]", estimated_prompt_tokens: Optional[int]) -> None:
     input = usage["prompt_tokens"] if "prompt_tokens" in usage else 0
     output = usage["completion_tokens"] if "completion_tokens" in usage else 0
     input_cache = 0
@@ -184,7 +240,7 @@ def add_usage_units(usage: "dict[str, Any]", units: "dict[str, Units]") -> None:
         if input_cache != 0:
             units["text_cache_read"] = Units(input=input_cache, output=0)
-    input = _PayiInstrumentor.update_for_vision(input - input_cache, units)
+    input = _PayiInstrumentor.update_for_vision(input - input_cache, units, estimated_prompt_tokens)
     units["text"] = Units(input=input, output=output)
@@ -197,28 +253,4 @@ def has_image_and_get_texts(encoding: tiktoken.Encoding, content: Union[str, 'li
             return has_image, 0
         token_count = sum(len(encoding.encode(item.get("text", ""))) for item in content if item.get("type") == "text")
-        return has_image, token_count
-def process_chat_request(ingest: IngestUnitsParams, *args: Any, **kwargs: Any) -> None: # noqa: ARG001
-    messages = kwargs.get("messages")
-    if not messages or len(messages) == 0:
-        return
-    estimated_token_count = 0
-    has_image = False
-    try:
-        enc = tiktoken.encoding_for_model(kwargs.get("model")) # type: ignore
-    except KeyError:
-        enc = tiktoken.get_encoding("o200k_base") # type: ignore
-    for message in messages:
-        msg_has_image, msg_prompt_tokens = has_image_and_get_texts(enc, message.get('content', ''))
-        if msg_has_image:
-            has_image = True
-            estimated_token_count += msg_prompt_tokens
-    if not has_image or estimated_token_count == 0:
-        return
-    ingest["units"][_PayiInstrumentor.estimated_prompt_tokens] = Units(input=estimated_token_count, output=0)
+        return has_image, token_count

payi/lib/instrument.py CHANGED Viewed

@@ -21,6 +21,21 @@ from .helpers import PayiCategories
 from .Stopwatch import Stopwatch
+class _ProviderRequest:
+    def __init__(self, instrumentor: '_PayiInstrumentor'):
+        self._instrumentor: '_PayiInstrumentor' = instrumentor
+        self._estimated_prompt_tokens: Optional[int] = None
+        self._ingest: IngestUnitsParams
+    def process_request(self, _kwargs: Any) -> None:
+        return
+    def process_chunk(self, _chunk: Any) -> bool:
+        return True
+    def process_synchronous_response(self, response: Any, log_prompt_and_response: bool, kwargs: Any) -> Optional[object]:  # noqa: ARG002
+        return None
 class PayiInstrumentConfig(TypedDict, total=False):
     proxy: bool
     global_instrumentation_enabled: bool
@@ -48,8 +63,6 @@ class _IsStreaming(Enum):
     kwargs = 2
 class _PayiInstrumentor:
-    estimated_prompt_tokens: str = "estimated_prompt_tokens"
     def __init__(
         self,
         payi: Optional[Payi],
@@ -484,9 +497,7 @@ class _PayiInstrumentor:
     async def achat_wrapper(
         self,
         category: str,
-        process_chunk: Optional[Callable[[Any, IngestUnitsParams], None]],
-        process_request: Optional[Callable[[IngestUnitsParams, Any, Any], None]],
-        process_synchronous_response: Any,
+        provider: _ProviderRequest,
         is_streaming: _IsStreaming,
         wrapped: Any,
         instance: Any,
@@ -511,8 +522,8 @@ class _PayiInstrumentor:
             return await wrapped(*args, **kwargs)
-        ingest: IngestUnitsParams = {"category": category, "units": {}} # type: ignore
-        ingest["resource"] = kwargs.get("model", "")
+        provider._ingest = {"category": category, "units": {}} # type: ignore
+        provider._ingest["resource"] = kwargs.get("model", "")
         if category == PayiCategories.openai and instance and hasattr(instance, "_client"):
             from .OpenAIInstrumentor import OpenAiInstrumentor # noqa: I001
@@ -530,21 +541,20 @@ class _PayiInstrumentor:
                         logging.error("Azure OpenAI invalid resource scope, not ingesting")
                         return wrapped(*args, **kwargs)
-                    ingest["resource_scope"] = resource_scope
+                    provider._ingest["resource_scope"] = resource_scope
                 category = PayiCategories.azure_openai
-                ingest["category"] = category
-                ingest["resource"] = route_as_resource
+                provider._ingest["category"] = category
+                provider._ingest["resource"] = route_as_resource
         current_frame = inspect.currentframe()
         # f_back excludes the current frame, strip() cleans up whitespace and newlines
         stack = [frame.strip() for frame in traceback.format_stack(current_frame.f_back)]  # type: ignore
-        ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
+        provider._ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
-        if process_request:
-            process_request(ingest, (), instance)
+        provider.process_request(kwargs)
         sw = Stopwatch()
         stream: bool = False
@@ -557,7 +567,7 @@ class _PayiInstrumentor:
             stream = False
         try:
-            self._prepare_ingest(ingest, extra_headers, **kwargs)
+            self._prepare_ingest(provider._ingest, extra_headers, **kwargs)
             sw.start()
             response = await wrapped(*args, **kwargs)
@@ -575,9 +585,8 @@ class _PayiInstrumentor:
                 instance=instance,
                 instrumentor=self,
                 log_prompt_and_response=self._log_prompt_and_response,
-                ingest=ingest,
                 stopwatch=sw,
-                process_chunk=process_chunk,
+                provider=provider,
                 is_bedrock=False,
             )
@@ -585,28 +594,25 @@ class _PayiInstrumentor:
         sw.stop()
         duration = sw.elapsed_ms_int()
-        ingest["end_to_end_latency_ms"] = duration
-        ingest["http_status_code"] = 200
+        provider._ingest["end_to_end_latency_ms"] = duration
+        provider._ingest["http_status_code"] = 200
-        if process_synchronous_response:
-            return_result: Any = process_synchronous_response(
-                response=response,
-                ingest=ingest,
-                log_prompt_and_response=self._log_prompt_and_response,
-                instrumentor=self)
-            if return_result:
-                return return_result
+        return_result: Any = provider.process_synchronous_response(
+            response=response,
+            log_prompt_and_response=self._log_prompt_and_response,
+            kwargs=kwargs)
-        await self._aingest_units(ingest)
+        if return_result:
+            return return_result
+        await self._aingest_units(provider._ingest)
         return response
     def chat_wrapper(
         self,
         category: str,
-        process_chunk: Optional[Callable[[Any, IngestUnitsParams], None]],
-        process_request: Optional[Callable[[IngestUnitsParams, Any, Any], None]],
-        process_synchronous_response: Any,
+        provider: _ProviderRequest,
         is_streaming: _IsStreaming,
         wrapped: Any,
         instance: Any,
@@ -635,13 +641,13 @@ class _PayiInstrumentor:
             return wrapped(*args, **kwargs)
-        ingest: IngestUnitsParams = {"category": category, "units": {}} # type: ignore
+        provider._ingest = {"category": category, "units": {}} # type: ignore
         if is_bedrock:
             # boto3 doesn't allow extra_headers
             kwargs.pop("extra_headers", None)
-            ingest["resource"] = kwargs.get("modelId", "")
+            provider._ingest["resource"] = kwargs.get("modelId", "")
         else:
-            ingest["resource"] = kwargs.get("model", "")
+            provider._ingest["resource"] = kwargs.get("model", "")
         if category == PayiCategories.openai and instance and hasattr(instance, "_client"):
             from .OpenAIInstrumentor import OpenAiInstrumentor # noqa: I001
@@ -659,21 +665,20 @@ class _PayiInstrumentor:
                         logging.error("Azure OpenAI invalid resource scope, not ingesting")
                         return wrapped(*args, **kwargs)
-                    ingest["resource_scope"] = resource_scope
+                    provider._ingest["resource_scope"] = resource_scope
                 category = PayiCategories.azure_openai
-                ingest["category"] = category
-                ingest["resource"] = route_as_resource
+                provider._ingest["category"] = category
+                provider._ingest["resource"] = route_as_resource
         current_frame = inspect.currentframe()
         # f_back excludes the current frame, strip() cleans up whitespace and newlines
         stack = [frame.strip() for frame in traceback.format_stack(current_frame.f_back)]  # type: ignore
-        ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
+        provider._ingest['properties'] = { 'system.stack_trace': json.dumps(stack) }
-        if process_request:
-            process_request(ingest, (), kwargs)
+        provider.process_request(kwargs)
         sw = Stopwatch()
         stream: bool = False
@@ -686,7 +691,7 @@ class _PayiInstrumentor:
             stream = False
         try:
-            self._prepare_ingest(ingest, extra_headers, **kwargs)
+            self._prepare_ingest(provider._ingest, extra_headers, **kwargs)
             sw.start()
             response = wrapped(*args, **kwargs)
@@ -704,9 +709,8 @@ class _PayiInstrumentor:
                 instance=instance,
                 instrumentor=self,
                 log_prompt_and_response=self._log_prompt_and_response,
-                ingest=ingest,
                 stopwatch=sw,
-                process_chunk=process_chunk,
+                provider=provider,
                 is_bedrock=is_bedrock,
             )
@@ -721,19 +725,17 @@ class _PayiInstrumentor:
         sw.stop()
         duration = sw.elapsed_ms_int()
-        ingest["end_to_end_latency_ms"] = duration
-        ingest["http_status_code"] = 200
+        provider._ingest["end_to_end_latency_ms"] = duration
+        provider._ingest["http_status_code"] = 200
-        if process_synchronous_response:
-            return_result: Any = process_synchronous_response(
-                response=response,
-                ingest=ingest,
-                log_prompt_and_response=self._log_prompt_and_response,
-                instrumentor=self)
-            if return_result:
-                return return_result
+        return_result: Any = provider.process_synchronous_response(
+            response=response,
+            log_prompt_and_response=self._log_prompt_and_response,
+            kwargs=kwargs)
+        if return_result:
+            return return_result
-        self._ingest_units(ingest)
+        self._ingest_units(provider._ingest)
         return response
@@ -808,13 +810,12 @@ class _PayiInstrumentor:
                 extra_headers[PayiHeaderNames.experience_id] = context_experience_id
     @staticmethod
-    def update_for_vision(input: int, units: 'dict[str, Units]') -> int:
-        if _PayiInstrumentor.estimated_prompt_tokens in units:
-            prompt_token_estimate: int = units.pop(_PayiInstrumentor.estimated_prompt_tokens)["input"] # type: ignore
-            vision = input - prompt_token_estimate
+    def update_for_vision(input: int, units: 'dict[str, Units]', estimated_prompt_tokens: Optional[int]) -> int:
+        if estimated_prompt_tokens:
+            vision = input - estimated_prompt_tokens
             if (vision > 0):
                 units["vision"] = Units(input=vision, output=0)
-                input = prompt_token_estimate
+                input = estimated_prompt_tokens
         return input
@@ -856,16 +857,15 @@ class ChatStreamWrapper(ObjectProxy):  # type: ignore
         response: Any,
         instance: Any,
         instrumentor: _PayiInstrumentor,
-        ingest: IngestUnitsParams,
         stopwatch: Stopwatch,
-        process_chunk: Optional[Callable[[Any, IngestUnitsParams], None]] = None,
+        provider: _ProviderRequest,
         log_prompt_and_response: bool = True,
         is_bedrock: bool = False,
     ) -> None:
         bedrock_from_stream: bool = False
         if is_bedrock:
-            ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
+            provider._ingest["provider_response_id"] = response["ResponseMetadata"]["RequestId"]
             stream = response.get("stream", None)
             if stream:
@@ -882,11 +882,10 @@ class ChatStreamWrapper(ObjectProxy):  # type: ignore
         self._instrumentor = instrumentor
         self._stopwatch: Stopwatch = stopwatch
-        self._ingest: IngestUnitsParams = ingest
         self._log_prompt_and_response: bool = log_prompt_and_response
         self._responses: list[str] = []
-        self._process_chunk: Optional[Callable[[Any, IngestUnitsParams], None]] = process_chunk
+        self._provider: _ProviderRequest = provider
         self._first_token: bool = True
         self._is_bedrock: bool = is_bedrock
@@ -906,7 +905,7 @@ class ChatStreamWrapper(ObjectProxy):  # type: ignore
     def __iter__(self) -> Any:
         if self._is_bedrock:
-            # MUST be reside in a separate function so that the yield statement doesn't implicitly return its own iterator and overriding self
+            # MUST reside in a separate function so that the yield statement (e.g. the generator) doesn't implicitly return its own iterator and overriding self
             return self._iter_bedrock()
         return self
@@ -935,7 +934,9 @@ class ChatStreamWrapper(ObjectProxy):  # type: ignore
                 self._stop_iteration()
             raise e
         else:
-            self._evaluate_chunk(chunk)
+            if self._evaluate_chunk(chunk) == False:
+                return self.__next__()
             return chunk
     async def __anext__(self) -> Any:
@@ -946,35 +947,35 @@ class ChatStreamWrapper(ObjectProxy):  # type: ignore
                 await self._astop_iteration()
             raise e
         else:
-            self._evaluate_chunk(chunk)
+            if self._evaluate_chunk(chunk) == False:
+                return await self.__anext__()
             return chunk
-    def _evaluate_chunk(self, chunk: Any) -> None:
+    def _evaluate_chunk(self, chunk: Any) -> bool:
         if self._first_token:
-            self._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
+            self._provider._ingest["time_to_first_token_ms"] = self._stopwatch.elapsed_ms_int()
             self._first_token = False
         if self._log_prompt_and_response:
             self._responses.append(self.chunk_to_json(chunk))
-        if self._process_chunk:
-            self._process_chunk(chunk, self._ingest)
+        return self._provider.process_chunk(chunk)
     def _process_stop_iteration(self) -> None:
         self._stopwatch.stop()
-        self._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
-        self._ingest["http_status_code"] = 200
+        self._provider._ingest["end_to_end_latency_ms"] = self._stopwatch.elapsed_ms_int()
+        self._provider._ingest["http_status_code"] = 200
         if self._log_prompt_and_response:
-            self._ingest["provider_response_json"] = self._responses
+            self._provider._ingest["provider_response_json"] = self._responses
     async def _astop_iteration(self) -> None:
         self._process_stop_iteration()
-        await self._instrumentor._aingest_units(self._ingest)
+        await self._instrumentor._aingest_units(self._provider._ingest)
     def _stop_iteration(self) -> None:
         self._process_stop_iteration()
-        self._instrumentor._ingest_units(self._ingest)
+        self._instrumentor._ingest_units(self._provider._ingest)
     @staticmethod
     def chunk_to_json(chunk: Any) -> str:

{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: payi
-Version: 0.1.0a64
+Version: 0.1.0a65
 Summary: The official Python library for the payi API
 Project-URL: Homepage, https://github.com/Pay-i/pay-i-python
 Project-URL: Repository, https://github.com/Pay-i/pay-i-python

{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/RECORD RENAMED Viewed

@@ -11,7 +11,7 @@ payi/_resource.py,sha256=j2jIkTr8OIC8sU6-05nxSaCyj4MaFlbZrwlyg4_xJos,1088
 payi/_response.py,sha256=CfrNS_3wbL8o9dRyRVfZQ5E1GUlA4CUIUEK8olmfGqE,28777
 payi/_streaming.py,sha256=Z_wIyo206T6Jqh2rolFg2VXZgX24PahLmpURp0-NssU,10092
 payi/_types.py,sha256=2mbMK86K3W1aMTW7sOGQ-VND6-A2IuXKm8p4sYFztBU,6141
-payi/_version.py,sha256=NwcYowxE17eI_XA6hOCHPtbfRQYksPdV6qi3Qzr03Zg,165
+payi/_version.py,sha256=DNP1TOymCfAVVUyTcBcwVSj1Nor6vREMNSwCUds3Phk,165
 payi/pagination.py,sha256=k2356QGPOUSjRF2vHpwLBdF6P-2vnQzFfRIJQAHGQ7A,1258
 payi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 payi/_utils/__init__.py,sha256=PNZ_QJuzZEgyYXqkO1HVhGkj5IU9bglVUcw7H-Knjzw,2062
@@ -24,12 +24,12 @@ payi/_utils/_transform.py,sha256=xfcRTFidCyPhQ7hXeivxpAS0x-NhTyr20iXm1cKcJYk,148
 payi/_utils/_typing.py,sha256=nTJz0jcrQbEgxwy4TtAkNxuU0QHHlmc6mQtA6vIR8tg,4501
 payi/_utils/_utils.py,sha256=8UmbPOy_AAr2uUjjFui-VZSrVBHRj6bfNEKRp5YZP2A,12004
 payi/lib/.keep,sha256=wuNrz-5SXo3jJaJOJgz4vFHM41YH_g20F5cRQo0vLes,224
-payi/lib/AnthropicInstrumentor.py,sha256=mqSkULumXbGQ9N2Q6Qf5f0IHo0E0Ss9wPWxXaTlGVis,5429
-payi/lib/BedrockInstrumentor.py,sha256=UGJiPSjYEuVWLLQu4kUMVrZfx6YJhC3IX4JNoOdcJQ8,10088
-payi/lib/OpenAIInstrumentor.py,sha256=Zdjaj1l2vteujPp3Bt7zwwbveUEQx6CglB1QIniVzL8,6898
+payi/lib/AnthropicInstrumentor.py,sha256=OivTBeTnwVYDaaoGDj2V77JmjEb2aKQ6Se295VAm_gg,5615
+payi/lib/BedrockInstrumentor.py,sha256=CDfZ_IDEuQ2PrLcmtKGOX69sPhiKqKNIoZ2c0W0mIR8,10595
+payi/lib/OpenAIInstrumentor.py,sha256=_LV_IqWBAqvBlzbYQHolVmGioTn60-zUkNgp179rIFk,8305
 payi/lib/Stopwatch.py,sha256=7OJlxvr2Jyb6Zr1LYCYKczRB7rDVKkIR7gc4YoleNdE,764
 payi/lib/helpers.py,sha256=dEscgoiCneUx1rbgayt8P-s-xi0gKiN2vWiKYMS7oiQ,3830
-payi/lib/instrument.py,sha256=k8Vb0Y6Qe8UVEPcXnNJn4Ovw08fpaO1XduI77tkEslo,43182
+payi/lib/instrument.py,sha256=zIo8ZdU2qQchC_d48OcH_Df5tYTWI7JGVGm08p2Riak,43079
 payi/resources/__init__.py,sha256=1rtrPLWbNt8oJGOp6nwPumKLJ-ftez0B6qwLFyfcoP4,2972
 payi/resources/ingest.py,sha256=ifKMKylIkfCF-uGFPttr_VG3vWxsqntOOBrrU4_g1zk,21627
 payi/resources/categories/__init__.py,sha256=w5gMiPdBSzJA_qfoVtFBElaoe8wGf_O63R7R1Spr6Gk,1093
@@ -135,7 +135,7 @@ payi/types/use_cases/definitions/kpi_retrieve_response.py,sha256=uQXliSvS3k-yDYw
 payi/types/use_cases/definitions/kpi_update_params.py,sha256=jbawdWAdMnsTWVH0qfQGb8W7_TXe3lq4zjSRu44d8p8,373
 payi/types/use_cases/definitions/kpi_update_response.py,sha256=zLyEoT0S8d7XHsnXZYT8tM7yDw0Aze0Mk-_Z6QeMtc8,459
 payi/types/use_cases/definitions/limit_config_create_params.py,sha256=pzQza_16N3z8cFNEKr6gPbFvuGFrwNuGxAYb--Kbo2M,449
-payi-0.1.0a64.dist-info/METADATA,sha256=_O20WUNp7iEAAIvh7f_e0ULa7_JqhM73sx5XlGpcjT8,15290
-payi-0.1.0a64.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
-payi-0.1.0a64.dist-info/licenses/LICENSE,sha256=CQt03aM-P4a3Yg5qBg3JSLVoQS3smMyvx7tYg_6V7Gk,11334
-payi-0.1.0a64.dist-info/RECORD,,
+payi-0.1.0a65.dist-info/METADATA,sha256=tYqX7J8pbMs3G74yZiX9T4cJ6wl3UQYn4OMCOUTnSBc,15290
+payi-0.1.0a65.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
+payi-0.1.0a65.dist-info/licenses/LICENSE,sha256=CQt03aM-P4a3Yg5qBg3JSLVoQS3smMyvx7tYg_6V7Gk,11334
+payi-0.1.0a65.dist-info/RECORD,,

{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/WHEEL RENAMED Viewed

File without changes

{payi-0.1.0a64.dist-info → payi-0.1.0a65.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

payi 0.1.0a64__py3-none-any.whl → 0.1.0a65__py3-none-any.whl

Potentially problematic release.

payi 0.1.0a64py3-none-any.whl → 0.1.0a65py3-none-any.whl