PyPI - payi - Versions diffs - 0.1.0a82__py3-none-any.whl → 0.1.0a84__py3-none-any.whl - Mend

payi 0.1.0a82py3-none-any.whl → 0.1.0a84py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of payi might be problematic. Click here for more details.

Files changed (22) hide show

payi/_version.py +1 -1
payi/lib/AnthropicInstrumentor.py +92 -62
payi/lib/BedrockInstrumentor.py +95 -108
payi/lib/GoogleGenAiInstrumentor.py +31 -115
payi/lib/OpenAIInstrumentor.py +13 -9
payi/lib/VertexInstrumentor.py +168 -111
payi/lib/instrument.py +135 -73
payi/resources/categories/__init__.py +14 -0
payi/resources/categories/categories.py +32 -0
payi/resources/categories/fixed_cost_resources.py +196 -0
payi/resources/ingest.py +14 -0
payi/resources/limits/limits.py +4 -0
payi/types/categories/__init__.py +1 -0
payi/types/categories/fixed_cost_resource_create_params.py +21 -0
payi/types/ingest_event_param.py +13 -1
payi/types/ingest_units_params.py +11 -1
payi/types/limit_create_params.py +2 -0
payi/types/limit_history_response.py +3 -3
{payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/METADATA +1 -1
{payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/RECORD +22 -20
{payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/WHEEL +0 -0
{payi-0.1.0a82.dist-info → payi-0.1.0a84.dist-info}/licenses/LICENSE +0 -0

payi/lib/GoogleGenAiInstrumentor.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import json
-import math
 from typing import Any, List, Union, Optional, Sequence
 from typing_extensions import override
 from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories
-from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class GoogleGenAiInstrumentor:
@@ -115,9 +113,6 @@ async def agenerate_stream_wrapper(
         kwargs,
     )
-def count_chars_skip_spaces(text: str) -> int:
-    return sum(1 for c in text if not c.isspace())
 class _GoogleGenAiRequest(_ProviderRequest):
     def __init__(self, instrumentor: _PayiInstrumentor):
         super().__init__(
@@ -126,7 +121,7 @@ class _GoogleGenAiRequest(_ProviderRequest):
             streaming_type=_StreamingType.generator,
             )
         self._prompt_character_count = 0
-        self._candiates_character_count = 0
+        self._candidates_character_count = 0
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -158,6 +153,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
         if isinstance(value, list):
             items = value # type: ignore
+        from .VertexInstrumentor import count_chars_skip_spaces
         for item in items: # type: ignore
             text = ""
             if isinstance(item, Part):
@@ -248,7 +245,10 @@ class _GoogleGenAiRequest(_ProviderRequest):
             prompt["tool_config"] = tool_config
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        from .VertexInstrumentor import vertex_compute_usage, count_chars_skip_spaces
+        ingest = False
         response_dict: dict[str, Any] = chunk.to_json_dict()
         if "provider_response_id" not in self._ingest:
             id = response_dict.get("response_id", None)
@@ -259,20 +259,24 @@ class _GoogleGenAiRequest(_ProviderRequest):
         self._ingest["resource"] = "google." + model
         for candidate in response_dict.get("candidates", []):
             parts = candidate.get("content", {}).get("parts", [])
             for part in parts:
-                self._candiates_character_count += count_chars_skip_spaces(part.get("text", ""))
+                self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
         usage = response_dict.get("usage_metadata", {})
         if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
-            self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
-        return True
-    @staticmethod
-    def _is_character_billing_model(model: str) -> bool:
-        return model.startswith("gemini-1.")
+            vertex_compute_usage(
+                request=self,
+                model=model,
+                response_dict=response_dict,
+                prompt_character_count=self._prompt_character_count,
+                streaming_candidates_characters=self._candidates_character_count
+                )
+            ingest = True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @override
     def process_synchronous_response(
@@ -282,6 +286,8 @@ class _GoogleGenAiRequest(_ProviderRequest):
         kwargs: Any) -> Any:
         response_dict = response.to_json_dict()
+        from .VertexInstrumentor import vertex_compute_usage
         id: Optional[str] = response_dict.get("response_id", None)
         if id:
             self._ingest["provider_response_id"] = id
@@ -290,105 +296,15 @@ class _GoogleGenAiRequest(_ProviderRequest):
         if model:
             self._ingest["resource"] = "google." + model
-        self._compute_usage(response_dict)
+        vertex_compute_usage(
+            request=self,
+            model=model,
+            response_dict=response_dict,
+            prompt_character_count=self._prompt_character_count,
+            streaming_candidates_characters=self._candidates_character_count
+            )
         if log_prompt_and_response:
             self._ingest["provider_response_json"] = [json.dumps(response_dict)]
-        return None
-    def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
-        if key not in self._ingest["units"]:
-            self._ingest["units"][key] = {}
-        if input is not None:
-            self._ingest["units"][key]["input"] = input
-        if output is not None:
-            self._ingest["units"][key]["output"] = output
-    def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
-        usage = response_dict.get("usage_metadata", {})
-        input = usage.get("prompt_token_count", 0)
-        prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
-        candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
-        model: str = response_dict.get("model_version", "")
-        # for character billing only
-        large_context = "" if input < 128000 else "_large_context"
-        if self._is_character_billing_model(model):
-            for details in prompt_tokens_details:
-                modality = details.get("modality", "")
-                if not modality:
-                    continue
-                modality_token_count = details.get("token_count", 0)
-                if modality == "TEXT":
-                    input = self._prompt_character_count
-                    if input == 0:
-                        # back up calc if nothing was calculated from the prompt
-                        input = response_dict["usage_metadata"]["prompt_token_count"] * 4
-                    output = 0
-                    if streaming_candidates_characters is None:
-                        for candidate in response_dict.get("candidates", []):
-                            parts = candidate.get("content", {}).get("parts", [])
-                            for part in parts:
-                                output += count_chars_skip_spaces(part.get("text", ""))
-                        if output == 0:
-                            # back up calc if no parts
-                            output = response_dict["usage_metadata"]["candidates_token_count"] * 4
-                    else:
-                        output = streaming_candidates_characters
-                    self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
-                elif modality == "IMAGE":
-                    num_images = math.ceil(modality_token_count / 258)
-                    self.add_units("vision"+large_context, input=num_images)
-                elif modality == "VIDEO":
-                    video_seconds = math.ceil(modality_token_count / 285)
-                    self.add_units("video"+large_context, input=video_seconds)
-                elif modality == "AUDIO":
-                    audio_seconds = math.ceil(modality_token_count / 25)
-                    self.add_units("audio"+large_context, input=audio_seconds)
-        else:
-            for details in prompt_tokens_details:
-                modality = details.get("modality", "")
-                if not modality:
-                    continue
-                modality_token_count = details.get("token_count", 0)
-                if modality == "IMAGE":
-                    self.add_units("vision", input=modality_token_count)
-                elif modality in ("VIDEO", "AUDIO", "TEXT"):
-                    self.add_units(modality.lower(), input=modality_token_count)
-            for details in candidates_tokens_details:
-                modality = details.get("modality", "")
-                if not modality:
-                    continue
-                modality_token_count = details.get("token_count", 0)
-                if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
-                    self.add_units(modality.lower(), output=modality_token_count)
-        if not self._ingest["units"]:
-            input = usage.get("prompt_token_count", 0)
-            output = usage.get("candidates_token_count", 0) * 4
-            if self._is_character_billing_model(model):
-                if self._prompt_character_count > 0:
-                    input = self._prompt_character_count
-                else:
-                    input *= 4
-                # if no units were added, add a default unit and assume 4 characters per token
-                self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
-            else:
-                # if no units were added, add a default unit
-                self._ingest["units"]["text"] = Units(input=input, output=output)
+        return None

payi/lib/OpenAIInstrumentor.py CHANGED Viewed

@@ -9,7 +9,7 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories, PayiHeaderNames
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class OpenAiInstrumentor:
@@ -22,8 +22,6 @@ class OpenAiInstrumentor:
     @staticmethod
     def instrument(instrumentor: _PayiInstrumentor) -> None:
         try:
-            from openai import OpenAI  # type: ignore #  noqa: F401  I001
             wrap_function_wrapper(
                 "openai.resources.chat.completions",
                 "Completions.create",
@@ -47,7 +45,11 @@ class OpenAiInstrumentor:
                  "AsyncEmbeddings.create",
                 aembeddings_wrapper(instrumentor),
             )
+        except Exception as e:
+            instrumentor._logger.debug(f"Error instrumenting openai: {e}")
+        # responses separately as they are relatively new and the client may not be using the latest openai module
+        try:
             wrap_function_wrapper(
                 "openai.resources.responses",
                 "Responses.create",
@@ -62,8 +64,6 @@ class OpenAiInstrumentor:
         except Exception as e:
             instrumentor._logger.debug(f"Error instrumenting openai: {e}")
-            return
 @_PayiInstrumentor.payi_wrapper
 def embeddings_wrapper(
@@ -338,7 +338,8 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
         self._include_usage_added = False
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         model = model_to_dict(chunk)
         if "provider_response_id" not in self._ingest:
@@ -356,8 +357,9 @@ class _OpenAiChatProviderRequest(_OpenAiProviderRequest):
             # packet which contains the usage to the client as they are not expecting the data
             if self._include_usage_added:
                 send_chunk_to_client = False
+            ingest = True
-        return send_chunk_to_client
+        return _ChunkResult(send_chunk_to_caller=send_chunk_to_client, ingest=ingest)
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
@@ -420,7 +422,8 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
             input_tokens_details_key=_OpenAiProviderRequest.responses_input_tokens_details_key)
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         model = model_to_dict(chunk)
         response: dict[str, Any] = model.get("response", {})
@@ -432,8 +435,9 @@ class _OpenAiResponsesProviderRequest(_OpenAiProviderRequest):
         usage = response.get("usage")
         if usage:
             self.add_usage_units(usage)
+            ingest = True
-        return True
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:

payi/lib/VertexInstrumentor.py CHANGED Viewed

@@ -8,33 +8,37 @@ from wrapt import wrap_function_wrapper  # type: ignore
 from payi.lib.helpers import PayiCategories
 from payi.types.ingest_units_params import Units
-from .instrument import _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
+from .instrument import _ChunkResult, _IsStreaming, _StreamingType, _ProviderRequest, _PayiInstrumentor
 class VertexInstrumentor:
     @staticmethod
     def instrument(instrumentor: _PayiInstrumentor) -> None:
         try:
-            import vertexai  # type: ignore #  noqa: F401  I001
             wrap_function_wrapper(
                 "vertexai.generative_models",
                 "GenerativeModel.generate_content",
                 generate_wrapper(instrumentor),
             )
-            wrap_function_wrapper(
-                "vertexai.preview.generative_models",
-                "GenerativeModel.generate_content",
-                generate_wrapper(instrumentor),
-            )
             wrap_function_wrapper(
                 "vertexai.generative_models",
                 "GenerativeModel.generate_content_async",
                 agenerate_wrapper(instrumentor),
             )
+        except Exception as e:
+            instrumentor._logger.debug(f"Error instrumenting vertex: {e}")
+            return
+        # separate instrumetning preview functionality from released in case it fails
+        try:
+            wrap_function_wrapper(
+                "vertexai.preview.generative_models",
+                "GenerativeModel.generate_content",
+                generate_wrapper(instrumentor),
+            )
             wrap_function_wrapper(
                 "vertexai.preview.generative_models",
                 "GenerativeModel.generate_content_async",
@@ -92,12 +96,20 @@ class _GoogleVertexRequest(_ProviderRequest):
             streaming_type=_StreamingType.generator,
             )
         self._prompt_character_count = 0
-        self._candiates_character_count = 0
+        self._candidates_character_count = 0
+        self._model_name: Optional[str] = None
     @override
     def process_request(self, instance: Any, extra_headers: 'dict[str, str]', args: Sequence[Any], kwargs: Any) -> bool:
         from vertexai.generative_models import Content, Image, Part # type: ignore #  noqa: F401  I001
+        # Try to extra the model name as a backup if the response does not provide it (older vertexai versions do not)
+        if instance and hasattr(instance, "_model_name"):
+            model = instance._model_name
+            if model and isinstance(model, str):
+                # Extract the model name after the last slash
+                self._model_name = model.split('/')[-1]
         if not args:
             return True
@@ -191,32 +203,44 @@ class _GoogleVertexRequest(_ProviderRequest):
             # tool_config does not have to_dict or any other serializable object
             prompt["tool_config"] = str(tool_config)  # type: ignore
+    def _get_model_name(self, response: 'dict[str, Any]') -> Optional[str]:
+        model: Optional[str] = response.get("model_version", None)
+        if model:
+            return model
+        return self._model_name
     @override
-    def process_chunk(self, chunk: Any) -> bool:
+    def process_chunk(self, chunk: Any) -> _ChunkResult:
+        ingest = False
         response_dict: dict[str, Any] = chunk.to_dict()
         if "provider_response_id" not in self._ingest:
             id = response_dict.get("response_id", None)
             if id:
                 self._ingest["provider_response_id"] = id
-        model: str = response_dict.get("model_version", "")
-        self._ingest["resource"] = "google." + model
+        if "resource" not in self._ingest:
+            model: Optional[str] = self._get_model_name(response_dict)  # type: ignore[unreachable]
+            if model:
+                self._ingest["resource"] = "google." + model
         for candidate in response_dict.get("candidates", []):
             parts = candidate.get("content", {}).get("parts", [])
             for part in parts:
-                self._candiates_character_count += count_chars_skip_spaces(part.get("text", ""))
+                self._candidates_character_count += count_chars_skip_spaces(part.get("text", ""))
         usage = response_dict.get("usage_metadata", {})
         if usage and "prompt_token_count" in usage and "candidates_token_count" in usage:
-            self._compute_usage(response_dict, streaming_candidates_characters=self._candiates_character_count)
+            vertex_compute_usage(
+                request=self,
+                model=self._get_model_name(response_dict),
+                response_dict=response_dict,
+                prompt_character_count=self._prompt_character_count,
+                streaming_candidates_characters=self._candidates_character_count,
+            )
+            ingest = True
-        return True
-    @staticmethod
-    def _is_character_billing_model(model: str) -> bool:
-        return model.startswith("gemini-1.")
+        return _ChunkResult(send_chunk_to_caller=True, ingest=ingest)
     @override
     def process_synchronous_response(
@@ -230,110 +254,143 @@ class _GoogleVertexRequest(_ProviderRequest):
         if id:
             self._ingest["provider_response_id"] = id
-        model: Optional[str] = response_dict.get("model_version", None)
+        model: Optional[str] = self._get_model_name(response_dict)
         if model:
             self._ingest["resource"] = "google." + model
-        self._compute_usage(response_dict)
+        vertex_compute_usage(
+            request=self,
+            model=model,
+            response_dict=response_dict,
+            prompt_character_count=self._prompt_character_count,
+            streaming_candidates_characters=self._candidates_character_count
+            )
         if log_prompt_and_response:
             self._ingest["provider_response_json"] = [json.dumps(response_dict)]
         return None
-    def add_units(self, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
-        if key not in self._ingest["units"]:
-            self._ingest["units"][key] = {}
+def vertex_compute_usage(
+    request: _ProviderRequest,
+    model: Optional[str],
+    response_dict: 'dict[str, Any]',
+    prompt_character_count: int = 0,
+    streaming_candidates_characters: Optional[int] = None) -> None:
+    def is_character_billing_model(model: str) -> bool:
+        return model.startswith("gemini-1.")
+    def is_large_context_token_model(model: str, input_tokens: int) -> bool:
+        return model.startswith("gemini-2.5-pro") and input_tokens > 200_000
+    def add_units(request: _ProviderRequest, key: str, input: Optional[int] = None, output: Optional[int] = None) -> None:
+        if key not in request._ingest["units"]:
+            request._ingest["units"][key] = {}
         if input is not None:
-            self._ingest["units"][key]["input"] = input
+            request._ingest["units"][key]["input"] = input
         if output is not None:
-            self._ingest["units"][key]["output"] = output
-    def _compute_usage(self, response_dict: 'dict[str, Any]', streaming_candidates_characters: Optional[int] = None) -> None:
-        usage = response_dict.get("usage_metadata", {})
-        input = usage.get("prompt_token_count", 0)
+            request._ingest["units"][key]["output"] = output
-        prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
-        candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
+    usage = response_dict.get("usage_metadata", {})
+    input = usage.get("prompt_token_count", 0)
-        model: str = response_dict.get("model_version", "")
-        # for character billing only
-        large_context = "" if input < 128000 else "_large_context"
-        if self._is_character_billing_model(model):
-            # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
-            for details in prompt_tokens_details:
-                modality = details.get("modality", "")
-                if not modality:
-                    continue
-                modality_token_count = details.get("token_count", 0)
-                if modality == "TEXT":
-                    input = self._prompt_character_count
-                    if input == 0:
-                        # back up calc if nothing was calculated from the prompt
-                        input = response_dict["usage_metadata"]["prompt_token_count"] * 4
-                    output = 0
-                    if streaming_candidates_characters is None:
-                        for candidate in response_dict.get("candidates", []):
-                            parts = candidate.get("content", {}).get("parts", [])
-                            for part in parts:
-                                output += count_chars_skip_spaces(part.get("text", ""))
-                        if output == 0:
-                            # back up calc if no parts
-                            output = response_dict["usage_metadata"]["candidates_token_count"] * 4
-                    else:
-                        output = streaming_candidates_characters
-                    self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
-                elif modality == "IMAGE":
-                    num_images = math.ceil(modality_token_count / 258)
-                    self.add_units("vision"+large_context, input=num_images)
-                elif modality == "VIDEO":
-                    video_seconds = math.ceil(modality_token_count / 285)
-                    self.add_units("video"+large_context, input=video_seconds)
-                elif modality == "AUDIO":
-                    audio_seconds = math.ceil(modality_token_count / 25)
-                    self.add_units("audio"+large_context, input=audio_seconds)
+    prompt_tokens_details: list[dict[str, Any]] = usage.get("prompt_tokens_details", [])
+    candidates_tokens_details: list[dict[str, Any]] = usage.get("candidates_tokens_details", [])
-        else:
-            for details in prompt_tokens_details:
-                modality = details.get("modality", "")
-                if not modality:
-                    continue
-                modality_token_count = details.get("token_count", 0)
-                if modality == "IMAGE":
-                    self.add_units("vision", input=modality_token_count)
-                elif modality in ("VIDEO", "AUDIO", "TEXT"):
-                    self.add_units(modality.lower(), input=modality_token_count)
-            for details in candidates_tokens_details:
-                modality = details.get("modality", "")
-                if not modality:
-                    continue
-                modality_token_count = details.get("token_count", 0)
-                if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
-                    self.add_units(modality.lower(), output=modality_token_count)
-        if not self._ingest["units"]:
-            input = usage.get("prompt_token_count", 0)
-            output = usage.get("candidates_token_count", 0) * 4
-            if self._is_character_billing_model(model):
-                if self._prompt_character_count > 0:
-                    input = self._prompt_character_count
+    if not model:
+        model = ""
+    large_context = ""
+    if is_character_billing_model(model):
+        if input > 128000:
+            large_context = "_large_context"
+        # gemini 1.0 and 1.5 units are reported in characters, per second, per image, etc...
+        for details in prompt_tokens_details:
+            modality = details.get("modality", "")
+            if not modality:
+                continue
+            modality_token_count = details.get("token_count", 0)
+            if modality == "TEXT":
+                input = prompt_character_count
+                if input == 0:
+                    # back up calc if nothing was calculated from the prompt
+                    input = response_dict["usage_metadata"]["prompt_token_count"] * 4
+                output = 0
+                if streaming_candidates_characters is None:
+                    for candidate in response_dict.get("candidates", []):
+                        parts = candidate.get("content", {}).get("parts", [])
+                        for part in parts:
+                            output += count_chars_skip_spaces(part.get("text", ""))
+                    if output == 0:
+                        # back up calc if no parts
+                        output = response_dict["usage_metadata"]["candidates_token_count"] * 4
                 else:
-                    input *= 4
+                    output = streaming_candidates_characters
+                request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
+            elif modality == "IMAGE":
+                num_images = math.ceil(modality_token_count / 258)
+                add_units(request, "vision"+large_context, input=num_images)
-                # if no units were added, add a default unit and assume 4 characters per token
-                self._ingest["units"]["text"+large_context] = Units(input=input, output=output)
+            elif modality == "VIDEO":
+                video_seconds = math.ceil(modality_token_count / 285)
+                add_units(request, "video"+large_context, input=video_seconds)
+            elif modality == "AUDIO":
+                audio_seconds = math.ceil(modality_token_count / 25)
+                add_units(request, "audio"+large_context, input=audio_seconds)
+        # No need to gover the candidates_tokens_details as all the character based 1.x models only output TEXT
+        # for details in candidates_tokens_details:
+    else:
+        # thinking tokens introduced in 2.5 after the transition to token based billing
+        thinking_token_count = usage.get("thoughts_token_count", 0)
+        if is_large_context_token_model(model, input):
+            large_context = "_large_context"
+        for details in prompt_tokens_details:
+            modality = details.get("modality", "")
+            if not modality:
+                continue
+            modality_token_count = details.get("token_count", 0)
+            if modality == "IMAGE":
+                add_units(request, "vision"+large_context, input=modality_token_count)
+            elif modality in ("VIDEO", "AUDIO", "TEXT"):
+                add_units(request, modality.lower()+large_context, input=modality_token_count)
+        for details in candidates_tokens_details:
+            modality = details.get("modality", "")
+            if not modality:
+                continue
+            modality_token_count = details.get("token_count", 0)
+            if modality in ("VIDEO", "AUDIO", "TEXT", "IMAGE"):
+                add_units(request, modality.lower()+large_context, output=modality_token_count)
+        if thinking_token_count > 0:
+            add_units(request, "reasoning"+large_context, output=thinking_token_count)
+    if not request._ingest["units"]:
+        input = usage.get("prompt_token_count", 0)
+        output = usage.get("candidates_token_count", 0) * 4
+        if is_character_billing_model(model):
+            if prompt_character_count > 0:
+                input = prompt_character_count
             else:
-                # if no units were added, add a default unit
-                self._ingest["units"]["text"] = Units(input=input, output=output)
+                input *= 4
+            # if no units were added, add a default unit and assume 4 characters per token
+            request._ingest["units"]["text"+large_context] = Units(input=input, output=output)
+        else:
+            # if no units were added, add a default unit
+            request._ingest["units"]["text"] = Units(input=input, output=output)

payi 0.1.0a82__py3-none-any.whl → 0.1.0a84__py3-none-any.whl

Potentially problematic release.

payi 0.1.0a82py3-none-any.whl → 0.1.0a84py3-none-any.whl