PyPI - payi - Versions diffs - 0.1.0a110__py3-none-any.whl → 0.1.0a111__py3-none-any.whl - Mend

payi 0.1.0a110py3-none-any.whl → 0.1.0a111py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of payi might be problematic. Click here for more details.

Files changed (10) hide show

payi/_version.py +1 -1
payi/lib/AnthropicInstrumentor.py +67 -20
payi/lib/BedrockInstrumentor.py +35 -1
payi/lib/VertexRequest.py +3 -1
payi/lib/data/cohere_embed_english_v3.json +30706 -0
payi/lib/instrument.py +4 -2
{payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/METADATA +2 -1
{payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/RECORD +10 -9
{payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/WHEEL +0 -0
{payi-0.1.0a110.dist-info → payi-0.1.0a111.dist-info}/licenses/LICENSE +0 -0

payi/_version.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 __title__ = "payi"
-__version__ = "0.1.0-alpha.110"  # x-release-please-version
+__version__ = "0.1.0-alpha.111"  # x-release-please-version

payi/lib/AnthropicInstrumentor.py CHANGED Viewed

@@ -45,6 +45,18 @@ class AnthropicInstrumentor:
                 stream_messages_wrapper(instrumentor),
             )
+            wrap_function_wrapper(
+                "anthropic.resources.beta.messages",
+                "Messages.create",
+                messages_wrapper(instrumentor),
+            )
+            wrap_function_wrapper(
+                "anthropic.resources.beta.messages",
+                "Messages.stream",
+                stream_messages_wrapper(instrumentor),
+            )
             wrap_function_wrapper(
                 "anthropic.resources.messages",
                 "AsyncMessages.create",
@@ -57,6 +69,18 @@ class AnthropicInstrumentor:
                 astream_messages_wrapper(instrumentor),
             )
+            wrap_function_wrapper(
+                "anthropic.resources.beta.messages",
+                "AsyncMessages.create",
+                amessages_wrapper(instrumentor),
+            )
+            wrap_function_wrapper(
+                "anthropic.resources.beta.messages",
+                "AsyncMessages.stream",
+                astream_messages_wrapper(instrumentor),
+            )
         except Exception as e:
             instrumentor._logger.debug(f"Error instrumenting anthropic: {e}")
             return
@@ -220,23 +244,52 @@ class _AnthropicProviderRequest(_ProviderRequest):
         return True
-def anthropic_process_synchronous_response(request: _ProviderRequest, response: 'dict[str, Any]', log_prompt_and_response: bool, assign_id: bool) -> Any:
-    usage = response['usage']
+def anthropic_process_compute_input_cost(request: _ProviderRequest, usage: 'dict[str, Any]') -> int:
     input = usage['input_tokens']
-    output = usage['output_tokens']
     units: dict[str, Units] = request._ingest["units"]
     cache_creation_input_tokens = usage.get("cache_creation_input_tokens", 0)
-    if cache_creation_input_tokens > 0:
-        units["text_cache_write"] = Units(input=cache_creation_input_tokens, output=0)
+    cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
+    total_input_tokens = input + cache_creation_input_tokens + cache_read_input_tokens
+    request._is_large_context = total_input_tokens > 200000
+    large_context = "_large_context" if request._is_large_context else ""
+    cache_creation: dict[str, int] = usage.get("cache_creation", {})
+    ephemeral_5m_input_tokens: Optional[int] = None
+    ephemeral_1h_input_tokens: Optional[int] = None
+    textCacheWriteAdded = False
+    if cache_creation:
+        ephemeral_5m_input_tokens = cache_creation.get("ephemeral_5m_input_tokens", 0)
+        if ephemeral_5m_input_tokens > 0:
+            textCacheWriteAdded = True
+            units["text_cache_write"+large_context] = Units(input=ephemeral_5m_input_tokens, output=0)
+        ephemeral_1h_input_tokens = cache_creation.get("ephemeral_1h_input_tokens", 0)
+        if ephemeral_1h_input_tokens > 0:
+            textCacheWriteAdded = True
+            units["text_cache_write_1h"+large_context] = Units(input=ephemeral_1h_input_tokens, output=0)
+    if textCacheWriteAdded is False and cache_creation_input_tokens > 0:
+        units["text_cache_write"+large_context] = Units(input=cache_creation_input_tokens, output=0)
     cache_read_input_tokens = usage.get("cache_read_input_tokens", 0)
     if cache_read_input_tokens > 0:
-        units["text_cache_read"] = Units(input=cache_read_input_tokens, output=0)
+        units["text_cache_read"+large_context] = Units(input=cache_read_input_tokens, output=0)
-    input = _PayiInstrumentor.update_for_vision(input, units, request._estimated_prompt_tokens)
+    return _PayiInstrumentor.update_for_vision(input, units, request._estimated_prompt_tokens, is_large_context=request._is_large_context)
-    units["text"] = Units(input=input, output=output)
+def anthropic_process_synchronous_response(request: _ProviderRequest, response: 'dict[str, Any]', log_prompt_and_response: bool, assign_id: bool) -> Any:
+    usage = response['usage']
+    units: dict[str, Units] = request._ingest["units"]
+    input_tokens = anthropic_process_compute_input_cost(request, usage)
+    output = usage['output_tokens']
+    large_context = "_large_context" if request._is_large_context else ""
+    units["text"+large_context] = Units(input=input_tokens, output=output)
     content = response.get('content', [])
     if content:
@@ -277,31 +330,25 @@ def anthropic_process_chunk(request: _ProviderRequest, chunk: 'dict[str, Any]',
         usage = message['usage']
         units = request._ingest["units"]
-        input = _PayiInstrumentor.update_for_vision(usage['input_tokens'], units, request._estimated_prompt_tokens)
-        units["text"] = Units(input=input, output=0)
-        text_cache_write: int = usage.get("cache_creation_input_tokens", 0)
-        if text_cache_write > 0:
-            units["text_cache_write"] = Units(input=text_cache_write, output=0)
+        input = anthropic_process_compute_input_cost(request, usage)
-        text_cache_read: int = usage.get("cache_read_input_tokens", 0)
-        if text_cache_read > 0:
-            units["text_cache_read"] = Units(input=text_cache_read, output=0)
+        large_context = "_large_context" if request._is_large_context else ""
+        units["text"+large_context] = Units(input=input, output=0)
         request._instrumentor._logger.debug(f"Anthropic streaming captured {input} input tokens, ")
     elif type == "message_delta":
         usage = chunk.get('usage', {})
         ingest = True
+        large_context = "_large_context" if request._is_large_context else ""
         # Web search will return an updated input tokens value at the end of streaming
         input_tokens = usage.get('input_tokens', None)
         if input_tokens is not None:
             request._instrumentor._logger.debug(f"Anthropic streaming finished, updated input tokens: {input_tokens}")
-            request._ingest["units"]["text"]["input"] = input_tokens
+            request._ingest["units"]["text"+large_context]["input"] = input_tokens
-        request._ingest["units"]["text"]["output"] = usage.get('output_tokens', 0)
+        request._ingest["units"]["text"+large_context]["output"] = usage.get('output_tokens', 0)
         request._instrumentor._logger.debug(f"Anthropic streaming finished: output tokens {usage.get('output_tokens', 0)} ")

payi/lib/BedrockInstrumentor.py CHANGED Viewed

@@ -5,6 +5,7 @@ from functools import wraps
 from typing_extensions import override
 from wrapt import ObjectProxy, wrap_function_wrapper  # type: ignore
+from tokenizers import Tokenizer  # type: ignore
 from payi.lib.helpers import PayiCategories, PayiHeaderNames, payi_aws_bedrock_url
 from payi.types.ingest_units_params import Units
@@ -102,6 +103,8 @@ def _redirect_to_payi(request: Any, event_name: str, **_: 'dict[str, Any]') -> N
 class InvokeResponseWrapper(ObjectProxy): # type: ignore
+    _cohere_embed_english_v3_tokenizer: Optional[Tokenizer] = None
     def __init__(
         self,
         response: Any,
@@ -160,6 +163,25 @@ class InvokeResponseWrapper(ObjectProxy): # type: ignore
             bedrock_converse_process_synchronous_function_call(self._request, response)
+        elif self._request._is_amazon_titan_embed_text_v1:
+            input = response.get('inputTextTokenCount', 0)
+            units["text"] = Units(input=input, output=0)
+        elif self._request._is_cohere_embed_english_v3:
+            texts: list[str] = response.get("texts", [])
+            if texts and len(texts) > 0:
+                text = " ".join(texts)
+                if self._cohere_embed_english_v3_tokenizer is None:
+                    current_dir = os.path.dirname(os.path.abspath(__file__))
+                    tokenizer_path = os.path.join(current_dir, "data", "cohere_embed_english_v3.json")
+                    self._cohere_embed_english_v3_tokenizer = Tokenizer.from_file(tokenizer_path) # type: ignore
+                tokens: list = self._cohere_embed_english_v3_tokenizer.encode(text, add_special_tokens=False).tokens # type: ignore
+                if tokens and isinstance(tokens, list):
+                    units["text"] = Units(input=len(tokens), output=0) # type: ignore
         if self._log_prompt_and_response:
             ingest["provider_response_json"] = data.decode('utf-8') # type: ignore
@@ -287,6 +309,8 @@ class _BedrockInvokeProviderRequest(_BedrockProviderRequest):
         self._is_anthropic: bool = 'anthropic' in model_id
         self._is_nova: bool = 'nova' in model_id
         self._is_meta: bool = 'meta' in model_id
+        self._is_amazon_titan_embed_text_v1: bool = 'amazon.titan-embed-text-v1' == model_id
+        self._is_cohere_embed_english_v3: bool = 'cohere.embed-english-v3' == model_id
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -302,7 +326,17 @@ class _BedrockInvokeProviderRequest(_BedrockProviderRequest):
                     anthropic_has_image_and_get_texts(self, messages)
             except Exception as e:
                 self._instrumentor._logger.debug(f"Bedrock invoke error processing request body: {e}")
+        elif self._is_cohere_embed_english_v3:
+            try:
+                body = json.loads( kwargs.get("body", ""))
+                input_type = body.get("input_type", "")
+                if input_type == 'image':
+                    images = body.get("images", [])
+                    if (len(images) > 0):
+                        # only supports one image according to docs
+                        self._ingest["units"]["vision"] = Units(input=1, output=0)
+            except Exception as e:
+                self._instrumentor._logger.debug(f"Bedrock invoke error processing request body: {e}")
         return True
     @override

payi/lib/VertexRequest.py CHANGED Viewed

@@ -148,7 +148,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
             return model.startswith("gemini-1.")
         def is_large_context_token_model(model: str, input_tokens: int) -> bool:
-            return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
+            return model.startswith("gemini-2.5-pro") and input_tokens > 200000
         def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
             if key not in request._ingest["units"]:
@@ -172,6 +172,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
         if is_character_billing_model(model):
             if input > 128000:
+                self._is_large_context = True
                 large_context = "_large_context"
             # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
@@ -222,6 +223,7 @@ class _VertexRequest(_ProviderRequest): # type: ignore
             thinking_token_count = usage.get("thoughts_token_count", 0)
             if is_large_context_token_model(model, input):
+                self._is_large_context = True
                 large_context = "_large_context"
             cache_details: dict[str, int] = {}

payi 0.1.0a110__py3-none-any.whl → 0.1.0a111__py3-none-any.whl

Potentially problematic release.

payi 0.1.0a110py3-none-any.whl → 0.1.0a111py3-none-any.whl