PyPI - judgeval - Versions diffs - 0.20.0__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

judgeval 0.20.0py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (19) hide show

judgeval/__init__.py +1 -2
judgeval/cli.py +9 -1
judgeval/tracer/constants.py +1 -1
judgeval/tracer/keys.py +10 -9
judgeval/tracer/llm/llm_anthropic/messages.py +34 -22
judgeval/tracer/llm/llm_anthropic/messages_stream.py +12 -12
judgeval/tracer/llm/llm_google/generate_content.py +8 -6
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +36 -12
judgeval/tracer/llm/llm_openai/chat_completions.py +75 -22
judgeval/tracer/llm/llm_openai/responses.py +77 -22
judgeval/tracer/llm/llm_openai/utils.py +22 -0
judgeval/tracer/llm/llm_together/chat_completions.py +22 -14
judgeval/utils/serialize.py +2 -2
judgeval/version.py +1 -1
{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/METADATA +1 -1
{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/RECORD +19 -18
{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/WHEEL +0 -0
{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/__init__.py CHANGED Viewed

@@ -170,8 +170,7 @@ class JudgmentClient(metaclass=SingletonMeta):
                 judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
                 return False
-        except Exception as e:
-            judgeval_logger.error(f"Error uploading custom scorer: {e}")
+        except Exception:
             raise

judgeval/cli.py CHANGED Viewed

@@ -6,6 +6,7 @@ from dotenv import load_dotenv
 from judgeval.logger import judgeval_logger
 from judgeval import JudgmentClient
 from judgeval.version import get_version
+from judgeval.exceptions import JudgmentAPIError
 load_dotenv()
@@ -56,8 +57,15 @@ def upload_scorer(
             judgeval_logger.error("Failed to upload custom scorer")
             raise typer.Exit(1)
+        judgeval_logger.info("Custom scorer uploaded successfully!")
         raise typer.Exit(0)
-    except Exception:
+    except Exception as e:
+        if isinstance(e, JudgmentAPIError) and e.status_code == 409:
+            judgeval_logger.error(
+                "Duplicate scorer detected. Use --overwrite flag to replace the existing scorer"
+            )
+            raise typer.Exit(1)
+        # Re-raise other exceptions
         raise

judgeval/tracer/constants.py CHANGED Viewed

	@@ -1 +1 @@
1	- JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "~~opentelemetry.instrumentation.~~judgeval"
1	+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "judgeval"

judgeval/tracer/keys.py CHANGED Viewed

@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
     PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
+    JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
+    JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
+    JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
+    JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
+        "judgment.usage.cache_creation_input_tokens"
+    )
+    JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
+    JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
+    JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
     GEN_AI_PROMPT = "gen_ai.prompt"
     GEN_AI_COMPLETION = "gen_ai.completion"
-    GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
-    GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
     GEN_AI_SYSTEM = "gen_ai.system"
-    GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
-    GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
-    GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
-        "gen_ai.usage.cache_creation_input_tokens"
-    )
-    GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
     GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
     GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
     GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"

judgeval/tracer/llm/llm_anthropic/messages.py CHANGED Viewed

@@ -95,7 +95,7 @@ def _wrap_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -112,17 +112,19 @@ def _wrap_non_streaming_sync(
                 _extract_anthropic_tokens(result.usage)
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                 cache_creation,
             )
             set_span_attribute(
@@ -133,7 +135,7 @@ def _wrap_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model,
         )
@@ -169,7 +171,7 @@ def _wrap_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -197,17 +199,21 @@ def _wrap_streaming_sync(
                     _extract_anthropic_tokens(usage_data)
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
                     span,
-                    AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                     cache_creation,
                 )
                 set_span_attribute(
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Message) -> None:
@@ -296,17 +302,19 @@ def _wrap_non_streaming_async(
                 _extract_anthropic_tokens(result.usage)
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                 cache_creation,
             )
             set_span_attribute(
@@ -317,7 +325,7 @@ def _wrap_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model,
         )
@@ -354,7 +362,7 @@ def _wrap_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -382,17 +390,21 @@ def _wrap_streaming_async(
                     _extract_anthropic_tokens(usage_data)
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
                     span,
-                    AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                     cache_creation,
                 )
                 set_span_attribute(

judgeval/tracer/llm/llm_anthropic/messages_stream.py CHANGED Viewed

@@ -44,7 +44,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -125,22 +125,22 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
                             ) = _extract_anthropic_tokens(final_message.usage)
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
                                 prompt_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                                 completion_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                                 cache_read,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                                 cache_creation,
                             )
                             set_span_attribute(
@@ -151,7 +151,7 @@ def wrap_messages_stream_sync(tracer: Tracer, client: Anthropic) -> None:
                         set_span_attribute(
                             span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                             final_message.model,
                         )
                     except Exception:
@@ -190,7 +190,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -271,22 +271,22 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
                             ) = _extract_anthropic_tokens(final_message.usage)
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
                                 prompt_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                                 completion_tokens,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                                 cache_read,
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                                 cache_creation,
                             )
                             set_span_attribute(
@@ -297,7 +297,7 @@ def wrap_messages_stream_async(tracer: Tracer, client: AsyncAnthropic) -> None:
                         set_span_attribute(
                             span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                             final_message.model,
                         )
                     except Exception:

judgeval/tracer/llm/llm_google/generate_content.py CHANGED Viewed

@@ -63,7 +63,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: GenerateContentResponse) -> None:
@@ -79,17 +79,19 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
                 _extract_google_tokens(usage_data)
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
                 cache_creation,
             )
             set_span_attribute(
@@ -100,7 +102,7 @@ def wrap_generate_content_sync(tracer: Tracer, client: Client) -> None:
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model_version if result.model_version else ctx["model_name"],
         )

judgeval/tracer/llm/llm_openai/beta_chat_completions.py CHANGED Viewed

@@ -16,6 +16,7 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync,
     immutable_wrap_async,
 )
+from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -45,7 +46,7 @@ def _wrap_beta_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -66,17 +67,29 @@ def _wrap_beta_non_streaming_sync(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -86,7 +99,7 @@ def _wrap_beta_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -128,7 +141,7 @@ def _wrap_beta_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
@@ -149,17 +162,28 @@ def _wrap_beta_non_streaming_async(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -169,7 +193,7 @@ def _wrap_beta_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )

judgeval/tracer/llm/llm_openai/chat_completions.py CHANGED Viewed

@@ -25,6 +25,7 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync_iterator,
     immutable_wrap_async_iterator,
 )
+from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -68,7 +69,7 @@ def _wrap_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -89,17 +90,29 @@ def _wrap_non_streaming_sync(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -109,7 +122,7 @@ def _wrap_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -145,7 +158,7 @@ def _wrap_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -182,17 +195,31 @@ def _wrap_streaming_sync(
                 if chunk.usage.prompt_tokens_details:
                     cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                    span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
                 )
                 set_span_attribute(
                     span,
@@ -264,7 +291,7 @@ def _wrap_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
@@ -285,17 +312,29 @@ def _wrap_non_streaming_async(
             if prompt_tokens_details:
                 cache_read = prompt_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -305,7 +344,7 @@ def _wrap_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             result.model or ctx["model_name"],
         )
@@ -342,7 +381,7 @@ def _wrap_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -379,17 +418,31 @@ def _wrap_streaming_async(
                 if chunk.usage.prompt_tokens_details:
                     cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
+                    cache_read,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                    span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
                 )
                 set_span_attribute(
                     span,

judgeval/tracer/llm/llm_openai/responses.py CHANGED Viewed

@@ -24,6 +24,7 @@ from judgeval.utils.wrappers import (
     immutable_wrap_sync_iterator,
     immutable_wrap_async_iterator,
 )
+from judgeval.tracer.llm.llm_openai.utils import openai_tokens_converter
 if TYPE_CHECKING:
     from judgeval.tracer import Tracer
@@ -62,7 +63,7 @@ def _wrap_responses_non_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Response) -> None:
@@ -80,17 +81,29 @@ def _wrap_responses_non_streaming_sync(
             completion_tokens = usage_data.output_tokens or 0
             cache_read = usage_data.input_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -101,7 +114,7 @@ def _wrap_responses_non_streaming_sync(
         if hasattr(result, "model"):
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                 result.model or ctx["model_name"],
             )
@@ -137,7 +150,7 @@ def _wrap_responses_streaming_sync(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -167,6 +180,7 @@ def _wrap_responses_streaming_sync(
                 ):
                     prompt_tokens = chunk.response.usage.input_tokens or 0
                     completion_tokens = chunk.response.usage.output_tokens or 0
+                    total_tokens = chunk.response.usage.total_tokens or 0
                     # Safely access nested cached_tokens
                     input_tokens_details = getattr(
                         chunk.response.usage, "input_tokens_details", None
@@ -177,21 +191,35 @@ def _wrap_responses_streaming_sync(
                         else 0
                     )
+                    prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                        openai_tokens_converter(
+                            prompt_tokens,
+                            completion_tokens,
+                            cache_read,
+                            0,
+                            total_tokens,
+                        )
+                    )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                        prompt_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                         completion_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                         cache_read,
                     )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                        0,
                     )
                     set_span_attribute(
                         span,
@@ -266,7 +294,7 @@ def _wrap_responses_non_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
     def post_hook(ctx: Dict[str, Any], result: Response) -> None:
@@ -284,17 +312,29 @@ def _wrap_responses_non_streaming_async(
             completion_tokens = usage_data.output_tokens or 0
             cache_read = usage_data.input_tokens_details.cached_tokens or 0
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                span, AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
             )
             set_span_attribute(
                 span,
@@ -305,7 +345,7 @@ def _wrap_responses_non_streaming_async(
         if hasattr(result, "model"):
             set_span_attribute(
                 span,
-                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
                 result.model or ctx["model_name"],
             )
@@ -341,7 +381,7 @@ def _wrap_responses_streaming_async(
         )
         ctx["model_name"] = kwargs.get("model", "")
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, ctx["model_name"]
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
         )
         ctx["accumulated_content"] = ""
@@ -373,6 +413,7 @@ def _wrap_responses_streaming_async(
                 ):
                     prompt_tokens = chunk.response.usage.input_tokens or 0
                     completion_tokens = chunk.response.usage.output_tokens or 0
+                    total_tokens = chunk.response.usage.total_tokens or 0
                     # Safely access nested cached_tokens
                     input_tokens_details = getattr(
                         chunk.response.usage, "input_tokens_details", None
@@ -383,21 +424,35 @@ def _wrap_responses_streaming_async(
                         else 0
                     )
+                    prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                        openai_tokens_converter(
+                            prompt_tokens,
+                            completion_tokens,
+                            cache_read,
+                            0,
+                            total_tokens,
+                        )
+                    )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                        prompt_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS,
                         completion_tokens,
                     )
                     set_span_attribute(
                         span,
-                        AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS,
                         cache_read,
                     )
                     set_span_attribute(
-                        span, AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                        span,
+                        AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                        0,
                     )
                     set_span_attribute(
                         span,

judgeval/tracer/llm/llm_openai/utils.py ADDED Viewed

@@ -0,0 +1,22 @@
+def openai_tokens_converter(
+    prompt_tokens: int,
+    completion_tokens: int,
+    cache_read: int,
+    cache_creation: int,
+    total_tokens: int,
+) -> tuple[int, int, int, int]:
+    """
+    Returns:
+        tuple[int, int, int, int]:
+            - judgment.usage.non_cached_input
+            - judgment.usage.output_tokens
+            - judgment.usage.cached_input_tokens
+            - judgment.usage.cache_creation_tokens
+    """
+    manual_tokens = prompt_tokens + completion_tokens + cache_read + cache_creation
+    if manual_tokens > total_tokens:
+        # This is the openAI case where we need to subtract the cached tokens from the input tokens
+        return prompt_tokens - cache_read, completion_tokens, cache_read, cache_creation
+    else:
+        return prompt_tokens, completion_tokens, cache_read, cache_creation

judgeval/tracer/llm/llm_together/chat_completions.py CHANGED Viewed

@@ -73,7 +73,7 @@ def _wrap_non_streaming_sync(
         )
         ctx["model_name"] = prefixed_model_name
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
@@ -90,10 +90,12 @@ def _wrap_non_streaming_sync(
                 result.usage
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
                 span,
@@ -103,7 +105,7 @@ def _wrap_non_streaming_sync(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             ctx["model_name"],
         )
@@ -143,7 +145,7 @@ def _wrap_streaming_sync(
         )
         ctx["model_name"] = prefixed_model_name
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
         )
         ctx["accumulated_content"] = ""
@@ -171,10 +173,12 @@ def _wrap_streaming_sync(
                     chunk.usage
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
                     span,
@@ -249,7 +253,7 @@ def _wrap_non_streaming_async(
         )
         ctx["model_name"] = prefixed_model_name
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
         )
     def post_hook(ctx: Dict[str, Any], result: ChatCompletionResponse) -> None:
@@ -266,10 +270,12 @@ def _wrap_non_streaming_async(
                 result.usage
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                span,
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
             )
             set_span_attribute(
-                span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
             )
             set_span_attribute(
                 span,
@@ -279,7 +285,7 @@ def _wrap_non_streaming_async(
         set_span_attribute(
             span,
-            AttributeKeys.GEN_AI_RESPONSE_MODEL,
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
             ctx["model_name"],
         )
@@ -320,7 +326,7 @@ def _wrap_streaming_async(
         )
         ctx["model_name"] = prefixed_model_name
         set_span_attribute(
-            ctx["span"], AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
+            ctx["span"], AttributeKeys.JUDGMENT_LLM_MODEL_NAME, prefixed_model_name
         )
         ctx["accumulated_content"] = ""
@@ -348,10 +354,12 @@ def _wrap_streaming_async(
                     chunk.usage
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens
+                    span,
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
                 )
                 set_span_attribute(
-                    span, AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens
+                    span, AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
                 )
                 set_span_attribute(
                     span,

judgeval/utils/serialize.py CHANGED Viewed

@@ -247,7 +247,7 @@ encoders_by_class_tuples = generate_encoders_by_class_tuples(ENCODERS_BY_TYPE)
 # Seralize arbitrary object to a json string
 def safe_serialize(obj: Any) -> str:
     try:
-        return orjson.dumps(json_encoder(obj)).decode()
+        return orjson.dumps(json_encoder(obj), option=orjson.OPT_NON_STR_KEYS).decode()
     except Exception as e:
         judgeval_logger.warning(f"Error serializing object: {e}")
-        return orjson.dumps(repr(obj)).decode()
+        return repr(obj)

judgeval/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.20.0"
+__version__ = "0.22.0"
 def get_version() -> str:

{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.20.0
+Version: 0.22.0
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-judgeval/__init__.py,sha256=GzRWhiT4QKa8bSAF3wMfPfGaN-_WwkRmtfie03vnLLc,6748
-judgeval/cli.py,sha256=bkwsDqX0sdfChLxm9aTLAIw0sPYv-fUbjmaFeBgPgk8,1803
+judgeval/__init__.py,sha256=RRiBbXUj7M1VW3NqFvMZlXyI72duh3VA5bfIWqPmKNw,6670
+judgeval/cli.py,sha256=T9nKO9eHMOiLCgxaxuihqtRHsG_dMT06sW6X873MmnI,2209
 judgeval/constants.py,sha256=JZZJ1MqzZZDVk-5PRPRbmLnM8mXI-RDL5vxa1JFuscs,3408
 judgeval/env.py,sha256=uFggNNKmfDaa5dmZMwwXVIDdHAHe524jDWUpByV4hm4,1879
 judgeval/exceptions.py,sha256=tTbfe4yoOtPXmn22UQz9-6a-5PT9uOko85xaRRwr0Sw,621
 judgeval/logger.py,sha256=VP5blbsJ53mvJbNHfBf5p2KrARUrkrErpPkB-__Hh3U,1562
-judgeval/version.py,sha256=Gd2zP_LUu1hS1WAozlH-tSGeW3sqNN8PHFKbLHXGpoY,74
+judgeval/version.py,sha256=JT1Ltu1j1xi9sHf7Inhosvl-1tp0QPms04o2RCvwJUI,74
 judgeval/warnings.py,sha256=LbGte14ppiFjrkp-JJYueZ40NWFvMkWRvPXr6r-fUWw,73
 judgeval/api/__init__.py,sha256=dGZm9KtgLMnmbiyDEJ_D7suuVqmsibR_Cd0YZRJ7qHI,15210
 judgeval/api/api_types.py,sha256=PJ5ZQWuvCl5GXFzhcpOw6Iuktr50lo5BaILmZcAKWfc,10085
@@ -43,8 +43,8 @@ judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py,sha256=ucYOI6ztAjf
 judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py,sha256=V3RdrWhnR_vLBrtWw7QbgN9K_A-Och7-v9I2fN4z8gY,506
 judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py,sha256=WhSkPs8tWyQ_cS-y-VTzrKAPlizKp-6zi_DmfgW4AgM,10773
 judgeval/tracer/__init__.py,sha256=E1rcegwIQyqEIvM4yfPLm2M0B6e27dhPrJPhMotBjtw,37432
-judgeval/tracer/constants.py,sha256=ae8tivAW97awJQxdRB9OMqX50wOLX3zqChT_AGkPBu0,85
-judgeval/tracer/keys.py,sha256=G2Qgb5ZlFsZvXPMylh-OLhHSnWYQ23g0GdtY9n9XuoE,2280
+judgeval/tracer/constants.py,sha256=tLR5ClDaNlNg_MAv2XRdk62uQW4KyBnWaNbG_YYblTc,55
+judgeval/tracer/keys.py,sha256=mYBo_X6-rC9xfiI-WpjHlO7rUtcMORtQXCQyO1F3Ycc,2387
 judgeval/tracer/managers.py,sha256=NEkovnK8Qaod1U5msT0_hyHUqo9C2uFro2IzNlC8jCo,5071
 judgeval/tracer/utils.py,sha256=xWha5iwC733wCf2HKbNqzxOPS1ovO1OymWIUFLz-UpQ,537
 judgeval/tracer/exporters/__init__.py,sha256=3WDXC28iY5gYMM5s7ejmy7P-DVDQ_iIuzwovZxUKJXg,1295
@@ -57,21 +57,22 @@ judgeval/tracer/llm/constants.py,sha256=IWa3CMes8wIt_UG7jrGEOztg2sHz54fdOMWIOOr-
 judgeval/tracer/llm/providers.py,sha256=VAimkmChOOjhC1cUv-0iG8pa5PhOw1HIOyt3zrIrbcM,628
 judgeval/tracer/llm/llm_anthropic/__init__.py,sha256=HG0gIlTgaRt-Y0u1ERPQ19pUgb4YHkTh7tZQPeyR4oM,80
 judgeval/tracer/llm/llm_anthropic/config.py,sha256=ICfKODPQvZsRxpK4xWQ-YE79pmWJTmY2wryddxpNdpM,153
-judgeval/tracer/llm/llm_anthropic/messages.py,sha256=U11364nrTt6M58K218uj8AxGPrNwzJ4idhEmZQtFuik,15152
-judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=ZhHigQujU-zHhklgwSVoQYtSsL_7yC5Rwpq9vozekMc,12140
+judgeval/tracer/llm/llm_anthropic/messages.py,sha256=T7dApxJCsOWEpquYSZICACwTioZG3ZcxHdJjvF04T2E,15474
+judgeval/tracer/llm/llm_anthropic/messages_stream.py,sha256=DKlZZnfK_yv_tEMwF2XxvsjgUjOFI3c5JUMQwERNV7k,12188
 judgeval/tracer/llm/llm_anthropic/wrapper.py,sha256=JILcyC4NvjXZSqlFoZp-VB-JsCYZkQPMFEYaB4AysrA,1849
 judgeval/tracer/llm/llm_google/__init__.py,sha256=otBZETsAfVZjtZaN5N36Ln0kw-I9jVB4tFGrV6novHo,74
 judgeval/tracer/llm/llm_google/config.py,sha256=S3yCAE9oHbXjLVYiz5mGD16yIgXMBBUu5UN4lBjoCNQ,162
-judgeval/tracer/llm/llm_google/generate_content.py,sha256=meLWeoZ7J2JtSkpt2Lt8qapYi_mxv0204cXWaFZ0FKs,3973
+judgeval/tracer/llm/llm_google/generate_content.py,sha256=w1rIh1cTBYnkfBQTL4qHntwsKfBcSrf2VSS2y-BOMRU,4030
 judgeval/tracer/llm/llm_google/wrapper.py,sha256=jqaMXGoM9dlPBbCFadMI5EqFrNHzBt0h9VkNn7KPVLk,901
 judgeval/tracer/llm/llm_openai/__init__.py,sha256=CyzwhY0-zmqWKlEno7JPBcvO7G_hI8dp6-_5_KEzFqg,74
-judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=KwetlVexleDSSyRBEezC7Fk5do1Vub3FwLbRhCPgktc,6490
-judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=NWPE_BQTGfTRfsqhzXtNlQAv1Cr2GymolrTGzIbr9Ok,15625
+judgeval/tracer/llm/llm_openai/beta_chat_completions.py,sha256=IXw-Gu-WUxQ-gaBUIe-aAKOn1Pakn_RFl0b1C_1toP8,7326
+judgeval/tracer/llm/llm_openai/chat_completions.py,sha256=HaZEZMla-VbacJTyOYeVNNXsRSSqE2di36NIpyzGBVY,17394
 judgeval/tracer/llm/llm_openai/config.py,sha256=NE0ixKhd4WVeAVjY8jNTncuKYH6R4MQDLPmcCsd3zWY,144
-judgeval/tracer/llm/llm_openai/responses.py,sha256=lhs4yS-rJU255vo5gsJkGFRloYurlfnXIkstjMwR3vA,15875
+judgeval/tracer/llm/llm_openai/responses.py,sha256=5le9rmUtLeVnz10DKS5ajkVcVCHQL3hi1b89pihmoL4,17872
+judgeval/tracer/llm/llm_openai/utils.py,sha256=H32OYE9JeN2Hmyf8qRjqz2ky9QCjSV7KAEKXNAtDWpE,832
 judgeval/tracer/llm/llm_openai/wrapper.py,sha256=Z5Ndib228yd1pXEQ4xIu7_CJHxpW_t0ofZAC6FLc5eU,2055
 judgeval/tracer/llm/llm_together/__init__.py,sha256=MEnsF77IgFD4h73hNCMpo-9a1PHHdm-OxPlOalXOMac,78
-judgeval/tracer/llm/llm_together/chat_completions.py,sha256=KC8sk40l1VDuFStuVGIV1GLLx3vrtPDk5Y2vJsnRe70,14156
+judgeval/tracer/llm/llm_together/chat_completions.py,sha256=RySsK3tqG0NpJHPlVQ705bXxIfseSQUhvIoS-sz4rOg,14380
 judgeval/tracer/llm/llm_together/config.py,sha256=jCJY0KQcHJZZJk2vq038GKIDUMusqgvRjQ0B6OV5uEc,150
 judgeval/tracer/llm/llm_together/wrapper.py,sha256=HFqy_MabQeSq8oj2diZhEuk1SDt_hDfk5MFdPn9MFhg,1733
 judgeval/tracer/processors/__init__.py,sha256=BdOOPOD1RfMI5YHW76DNPKR07EAev-JxoolZ3KaXNNU,7100
@@ -87,7 +88,7 @@ judgeval/utils/file_utils.py,sha256=vq-n5WZEZjVbZ5S9QTkW8nSH6Pvw-Jx0ttsQ1t0wnPQ,
 judgeval/utils/guards.py,sha256=_DaKZxvjD10J97Ze2paHhbCiV2MpDz3FZQmNwaL5k0w,945
 judgeval/utils/meta.py,sha256=RAqZuvOlymqMwFoS0joBW_r65lcN9bY8BpNYHoytKps,773
 judgeval/utils/project.py,sha256=kGpYmp6QGTD6h-GjQ-ovT7kBmGnyb99MWDJmRGFQHOg,527
-judgeval/utils/serialize.py,sha256=QXR-8Nj5rqOrI9zLx0oRLdk6DW6Bc7j8eyF4zQ7PLxA,6256
+judgeval/utils/serialize.py,sha256=WbforbVFGINuk68T2YtWhj-ECMC6rWol3g5dxz9nsm8,6265
 judgeval/utils/testing.py,sha256=m5Nexv65tmfSj1XvAPK5Ear7aJ7w5xjDtZN0tLZ_RBk,2939
 judgeval/utils/url.py,sha256=Shf0v3XcbaWpL0m1eGJEEO_z4TsQCnDB2Rl25OTUmiI,195
 judgeval/utils/version_check.py,sha256=se4Ft8rjcl5u7fHMxSGQpka844V2AcZpOYl6StLWTio,1081
@@ -104,8 +105,8 @@ judgeval/utils/wrappers/mutable_wrap_async.py,sha256=stHISOUCGFUJXY8seXmxUo4ZpMF
 judgeval/utils/wrappers/mutable_wrap_sync.py,sha256=t5jygAQ1vqhy8s1GfiLeYygYgaLTgfoYASN47U5JiPs,2888
 judgeval/utils/wrappers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 judgeval/utils/wrappers/utils.py,sha256=j18vaa6JWDw2s3nQy1z5PfV_9Xxio-bVARaHG_0XyL0,1228
-judgeval-0.20.0.dist-info/METADATA,sha256=dOkZ3SyecH3UHCerW1wwdVpfkzOAjCOkSHtl_D0mGqY,11483
-judgeval-0.20.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-judgeval-0.20.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
-judgeval-0.20.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
-judgeval-0.20.0.dist-info/RECORD,,
+judgeval-0.22.0.dist-info/METADATA,sha256=xPrltYImsdj9e2CAxKmwop-VJCf9ZPXzgKi83qkUm8E,11483
+judgeval-0.22.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+judgeval-0.22.0.dist-info/entry_points.txt,sha256=-eoeD-oDLn4A7MSgeBS9Akwanf3_0r0cgEleBcIOjg0,46
+judgeval-0.22.0.dist-info/licenses/LICENSE.md,sha256=tKmCg7k5QOmxPK19XMfzim04QiQJPmgIm0pAn55IJwk,11352
+judgeval-0.22.0.dist-info/RECORD,,

{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{judgeval-0.20.0.dist-info → judgeval-0.22.0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

judgeval 0.20.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

Potentially problematic release.

judgeval 0.20.0py3-none-any.whl → 0.22.0py3-none-any.whl