PyPI - judgeval - Versions diffs - 0.16.9__py3-none-any.whl → 0.22.2__py3-none-any.whl - Mend

judgeval 0.16.9py3-none-any.whl → 0.22.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (37) hide show

judgeval/__init__.py +32 -2
judgeval/api/__init__.py +108 -0
judgeval/api/api_types.py +76 -15
judgeval/cli.py +16 -1
judgeval/data/judgment_types.py +76 -20
judgeval/dataset/__init__.py +11 -2
judgeval/env.py +2 -11
judgeval/evaluation/__init__.py +4 -0
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +1 -13
judgeval/tracer/__init__.py +371 -257
judgeval/tracer/constants.py +1 -1
judgeval/tracer/exporters/store.py +32 -16
judgeval/tracer/keys.py +11 -9
judgeval/tracer/llm/llm_anthropic/messages.py +38 -26
judgeval/tracer/llm/llm_anthropic/messages_stream.py +14 -14
judgeval/tracer/llm/llm_google/generate_content.py +9 -7
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +38 -14
judgeval/tracer/llm/llm_openai/chat_completions.py +90 -26
judgeval/tracer/llm/llm_openai/responses.py +88 -26
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_together/chat_completions.py +26 -18
judgeval/tracer/managers.py +4 -0
judgeval/trainer/__init__.py +10 -1
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +1 -1
judgeval/trainer/fireworks_trainer.py +396 -0
judgeval/trainer/trainer.py +52 -387
judgeval/utils/guards.py +9 -5
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +2 -2
judgeval/version.py +1 -1
{judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/METADATA +2 -3
{judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/RECORD +37 -32
{judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/WHEEL +0 -0
{judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/entry_points.txt +0 -0
{judgeval-0.16.9.dist-info → judgeval-0.22.2.dist-info}/licenses/LICENSE.md +0 -0

judgeval/tracer/llm/llm_openai/chat_completions.py CHANGED Viewed

@@ -25,6 +25,10 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync_iterator,
     immutable_wrap_async_iterator,
 )
+from judgeval.tracer.llm.llm_openai.utils import (
+    openai_tokens_converter,
+    set_cost_attribute,
+)
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -62,13 +66,13 @@ def _wrap_non_streaming_sync(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -89,17 +93,31 @@ def _wrap_non_streaming_sync(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -109,7 +127,7 @@ def _wrap_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -139,13 +157,13 @@ def _wrap_streaming_sync(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -182,17 +200,33 @@ def _wrap_streaming_sync(
                 if chunk.usage.prompt_tokens_details:
                     cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                set_cost_attribute(span, chunk.usage)
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                    span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
                 )
                 set_span_attribute(
                     span,
@@ -258,13 +292,13 @@ def _wrap_non_streaming_async(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -285,17 +319,31 @@ def _wrap_non_streaming_async(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -305,7 +353,7 @@ def _wrap_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -336,13 +384,13 @@ def _wrap_streaming_async(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -379,17 +427,33 @@ def _wrap_streaming_async(
                 if chunk.usage.prompt_tokens_details:
                     cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                set_cost_attribute(span, chunk.usage)
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                    span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
                 )
                 set_span_attribute(
                     span,

judgeval/tracer/llm/llm_openai/responses.py CHANGED Viewed

@@ -24,6 +24,10 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync_iterator,
     immutable_wrap_async_iterator,
 )
+from judgeval.tracer.llm.llm_openai.utils import (
+    openai_tokens_converter,
+    set_cost_attribute,
+)
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -56,13 +60,13 @@ def _wrap_responses_non_streaming_sync(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Response) -> None:
@@ -80,17 +84,30 @@ def _wrap_responses_non_streaming_sync(
             completion_tokens = usage_data.output_tokens or 0
             cache_read = usage_data.input_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -101,7 +118,7 @@ def _wrap_responses_non_streaming_sync(
         if hasattr(result, "model"):
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                 result.model or ctx["model_name"],
             )
@@ -131,13 +148,13 @@ def _wrap_responses_streaming_sync(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -167,6 +184,7 @@ def _wrap_responses_streaming_sync(
                 ):
                     prompt_tokens = chunk.response.usage.input_tokens or 0
                     completion_tokens = chunk.response.usage.output_tokens or 0
+                    total_tokens = chunk.response.usage.total_tokens or 0
                     # Safely access nested cached_tokens
                     input_tokens_details = getattr(
                         chunk.response.usage, "input_tokens_details", None
@@ -177,21 +195,36 @@ def _wrap_responses_streaming_sync(
                         else 0
                     )
+                    set_cost_attribute(span, chunk.response.usage)
+                    prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                        openai_tokens_converter(
+                            prompt_tokens,
+                            completion_tokens,
+                            cache_read,
+                            0,
+                            total_tokens,
+                        )
+                    )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                        prompt_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                         completion_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                         cache_read,
                     )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                        0,
                     )
                     set_span_attribute(
                         span,
@@ -260,13 +293,13 @@ def _wrap_responses_non_streaming_async(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Response) -> None:
@@ -284,17 +317,30 @@ def _wrap_responses_non_streaming_async(
             completion_tokens = usage_data.output_tokens or 0
             cache_read = usage_data.input_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -305,7 +351,7 @@ def _wrap_responses_non_streaming_async(
         if hasattr(result, "model"):
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                 result.model or ctx["model_name"],
             )
@@ -335,13 +381,13 @@ def _wrap_responses_streaming_async(
         ctx["span"] = tracer.get_tracer().start_span(
             "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
         )
-        tracer.add_agent_attributes_to_span(ctx["span"])
+        tracer._inject_judgment_context(ctx["span"])
         set_span_attribute(
             ctx["span"], AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -373,6 +419,7 @@ def _wrap_responses_streaming_async(
                 ):
                     prompt_tokens = chunk.response.usage.input_tokens or 0
                     completion_tokens = chunk.response.usage.output_tokens or 0
+                    total_tokens = chunk.response.usage.total_tokens or 0
                     # Safely access nested cached_tokens
                     input_tokens_details = getattr(
                         chunk.response.usage, "input_tokens_details", None
@@ -383,21 +430,36 @@ def _wrap_responses_streaming_async(
                         else 0
                     )
+                    set_cost_attribute(span, chunk.response.usage)
+                    prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                        openai_tokens_converter(
+                            prompt_tokens,
+                            completion_tokens,
+                            cache_read,
+                            0,
+                            total_tokens,
+                        )
+                    )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                        prompt_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                         completion_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                         cache_read,
                     )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                        0,
                     )
                     set_span_attribute(
                         span,

judgeval/tracer/llm/llm_openai/utils.py ADDED Viewed

@@ -0,0 +1,42 @@
+from typing import Any
+from opentelemetry.trace import Span
+from judgeval.tracer.keys import AttributeKeys
+from judgeval.tracer.utils import set_span_attribute
+from judgeval.utils.serialize import safe_serialize
+def openai_tokens_converter(
+    prompt_tokens: int,
+    completion_tokens: int,
+    cache_read: int,
+    cache_creation: int,
+    total_tokens: int,
+) -> tuple[int, int, int, int]:
+    """
+    Returns:
+        tuple[int, int, int, int]:
+            - judgment.usage.non_cached_input
+            - judgment.usage.output_tokens
+            - judgment.usage.cached_input_tokens
+            - judgment.usage.cache_creation_tokens
+    """
+    manual_tokens = prompt_tokens + completion_tokens + cache_read + cache_creation
+    if manual_tokens > total_tokens:
+        # This is the openAI case where we need to subtract the cached tokens from the input tokens
+        return prompt_tokens - cache_read, completion_tokens, cache_read, cache_creation
+    else:
+        return prompt_tokens, completion_tokens, cache_read, cache_creation
+def set_cost_attribute(span: Span, usage_data: Any) -> None:
+    """
+    This is for OpenRouter case where the cost is provided in the usage data when they specify:
+    extra_body={"usage": {"include": True}},
+    """
+    if hasattr(usage_data, "cost") and usage_data.cost:
+        set_span_attribute(
+            span,
+            AttributeKeys.JUDGMENT_USAGE_TOTAL_COST_USD,
+            safe_serialize(usage_data.cost),
+        )

judgeval 0.16.9__py3-none-any.whl → 0.22.2__py3-none-any.whl

Potentially problematic release.

judgeval 0.16.9py3-none-any.whl → 0.22.2py3-none-any.whl