PyPI - judgeval - Versions diffs - 0.20.1__tar.gz → 0.21.0__tar.gz - Mend

judgeval 0.20.1tar.gz → 0.21.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (167) hide show

{judgeval-0.20.1 → judgeval-0.21.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.20.1
+Version: 0.21.0
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.20.1 → judgeval-0.21.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "judgeval"
-version = "0.20.1"
+version = "0.21.0"
 authors = [
     { name = "Andrew Li", email = "andrew@judgmentlabs.ai" },
     { name = "Alex Shan", email = "alex@judgmentlabs.ai" },

{judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/keys.py RENAMED Viewed

@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
     PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
+    JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
+    JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
+    JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
+    JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
+        "judgment.usage.cache_creation_input_tokens"
+    )
+    JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
+    JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
+    JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
     GEN_AI_PROMPT = "gen_ai.prompt"
     GEN_AI_COMPLETION = "gen_ai.completion"
-    GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
-    GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
     GEN_AI_SYSTEM = "gen_ai.system"
-    GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
-    GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
-    GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
-        "gen_ai.usage.cache_creation_input_tokens"
-    )
-    GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
     GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
     GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
     GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"

{judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/messages.py RENAMED Viewed

@@ -95,7 +95,7 @@ def _wrap_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -112,17 +112,19 @@ def _wrap_non_streaming_sync(
                 _extract_anthropic_tokens(result.usage)
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                 cache_creation,
             )
             set_span_attribute(
@@ -133,7 +135,7 @@ def _wrap_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model,
         )
@@ -169,7 +171,7 @@ def _wrap_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -197,17 +199,21 @@ def _wrap_streaming_sync(
                     _extract_anthropic_tokens(usage_data)
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
                     span,
-                    AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                     cache_creation,
                 )
                 set_span_attribute(
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -296,17 +302,19 @@ def _wrap_non_streaming_async(
                 _extract_anthropic_tokens(result.usage)
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                 cache_creation,
             )
             set_span_attribute(
@@ -317,7 +325,7 @@ def _wrap_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model,
         )
@@ -354,7 +362,7 @@ def _wrap_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -382,17 +390,21 @@ def _wrap_streaming_async(
                     _extract_anthropic_tokens(usage_data)
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
                     span,
-                    AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                     cache_creation,
                 )
                 set_span_attribute(

{judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_anthropic/messages_stream.py RENAMED Viewed

@@ -44,7 +44,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -125,22 +125,22 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
                             ) = _extract_anthropic_tokens(final_message.usage)
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
                                 prompt_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                                 completion_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                                 cache_read,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                                 cache_creation,
                             )
                             set_span_attribute(
@@ -151,7 +151,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
                         set_span_attribute(
                             span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                             final_message.model,
                         )
                     except Exception:
@@ -190,7 +190,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -271,22 +271,22 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
                             ) = _extract_anthropic_tokens(final_message.usage)
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
                                 prompt_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                                 completion_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                                 cache_read,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                                 cache_creation,
                             )
                             set_span_attribute(
@@ -297,7 +297,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
                         set_span_attribute(
                             span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                             final_message.model,
                         )
                     except Exception:

{judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_google/generate_content.py RENAMED Viewed

@@ -63,7 +63,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: GenerateContentResponse) -> None:
@@ -79,17 +79,19 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
                 _extract_google_tokens(usage_data)
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                 cache_creation,
             )
             set_span_attribute(
@@ -100,7 +102,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model_version if result.model_version else ctx["model_name"],
         )

{judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/beta_chat_completions.py RENAMED Viewed

@@ -16,6 +16,7 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync,
     immutable_wrap_async,
 )
+from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -45,7 +46,7 @@ def _wrap_beta_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -66,17 +67,29 @@ def _wrap_beta_non_streaming_sync(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -86,7 +99,7 @@ def _wrap_beta_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -128,7 +141,7 @@ def _wrap_beta_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -149,17 +162,28 @@ def _wrap_beta_non_streaming_async(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -169,7 +193,7 @@ def _wrap_beta_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )

{judgeval-0.20.1 → judgeval-0.21.0}/src/judgeval/tracer/llm/llm_openai/chat_completions.py RENAMED Viewed

@@ -25,6 +25,7 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync_iterator,
     immutable_wrap_async_iterator,
 )
+from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -68,7 +69,7 @@ def _wrap_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -89,17 +90,29 @@ def _wrap_non_streaming_sync(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -109,7 +122,7 @@ def _wrap_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -145,7 +158,7 @@ def _wrap_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -182,17 +195,31 @@ def _wrap_streaming_sync(
                 if chunk.usage.prompt_tokens_details:
                     cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                    span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
                 )
                 set_span_attribute(
                     span,
@@ -264,7 +291,7 @@ def _wrap_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -285,17 +312,29 @@ def _wrap_non_streaming_async(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -305,7 +344,7 @@ def _wrap_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -342,7 +381,7 @@ def _wrap_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -379,17 +418,31 @@ def _wrap_streaming_async(
                 if chunk.usage.prompt_tokens_details:
                     cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                    span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
                 )
                 set_span_attribute(
                     span,

judgeval 0.20.1__tar.gz → 0.21.0__tar.gz

Potentially problematic release.

judgeval 0.20.1tar.gz → 0.21.0tar.gz