PyPI - judgeval - Versions diffs - 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl - Mend

judgeval 0.16.5py3-none-any.whl → 0.16.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (16) hide show

judgeval/__init__.py +7 -2
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +15 -4
judgeval/tracer/__init__.py +9 -1
judgeval/tracer/llm/llm_anthropic/wrapper.py +160 -130
judgeval/tracer/llm/llm_google/wrapper.py +137 -98
judgeval/tracer/llm/llm_groq/wrapper.py +137 -116
judgeval/tracer/llm/llm_openai/wrapper.py +130 -106
judgeval/tracer/llm/llm_together/wrapper.py +145 -120
judgeval/tracer/utils.py +1 -1
judgeval/utils/decorators/dont_throw.py +1 -1
judgeval/version.py +1 -1
{judgeval-0.16.5.dist-info → judgeval-0.16.7.dist-info}/METADATA +1 -1
{judgeval-0.16.5.dist-info → judgeval-0.16.7.dist-info}/RECORD +16 -16
{judgeval-0.16.5.dist-info → judgeval-0.16.7.dist-info}/WHEEL +0 -0
{judgeval-0.16.5.dist-info → judgeval-0.16.7.dist-info}/entry_points.txt +0 -0
{judgeval-0.16.5.dist-info → judgeval-0.16.7.dist-info}/licenses/LICENSE.md +0 -0

judgeval/tracer/llm/llm_openai/wrapper.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
 import functools
-import orjson
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -22,6 +21,7 @@ from judgeval.tracer.llm.llm_openai.config import (
     openai_AsyncOpenAI,
 )
 from judgeval.tracer.managers import sync_span_context, async_span_context
+from judgeval.logger import judgeval_logger
 from judgeval.tracer.keys import AttributeKeys
 from judgeval.tracer.utils import set_span_attribute
 from judgeval.utils.serialize import safe_serialize
@@ -426,7 +426,7 @@ class TracedOpenAIAsyncGenerator:
             raise
-TClient = TypeVar("TClient", bound=OpenAIClient)
+TClient = TypeVar("TClient", bound=Union[OpenAIClient, OpenAIAsyncClient])
 def wrap_openai_client(tracer: Tracer, client: TClient) -> TClient:
@@ -457,66 +457,78 @@ def wrap_openai_client(tracer: Tracer, client: TClient) -> TClient:
                 with sync_span_context(
                     tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
                 ) as span:
-                    tracer.add_agent_attributes_to_span(span)
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                    )
-                    model_name = kwargs.get("model", "")
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
-                    )
-                    response = function(*args, **kwargs)
-                    if isinstance(response, (OpenAIChatCompletionBase, OpenAIResponse)):
-                        output, usage_data = _format_openai_output(response)
-                        # Serialize structured data to JSON for span attribute
-                        if isinstance(output, list):
-                            output_str = orjson.dumps(
-                                output, option=orjson.OPT_INDENT_2
-                            ).decode()
-                        else:
-                            output_str = str(output) if output is not None else None
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
                         set_span_attribute(
-                            span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
                         )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_openai_tokens(usage_data)
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
-                            )
+                        model_name = kwargs.get("model", "")
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[openai wrapped] Error adding span metadata: {e}"
+                        )
+                    response = function(*args, **kwargs)
+                    try:
+                        if isinstance(
+                            response, (OpenAIChatCompletionBase, OpenAIResponse)
+                        ):
+                            output, usage_data = _format_openai_output(response)
+                            # Serialize structured data to JSON for span attribute
+                            if isinstance(output, list):
+                                output_str = safe_serialize(output)
+                            else:
+                                output_str = str(output) if output is not None else None
                             set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
-                                cache_creation,
+                                span, AttributeKeys.GEN_AI_COMPLETION, output_str
                             )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_openai_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                                    cache_creation,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                getattr(response, "model", model_name),
                             )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            getattr(response, "model", model_name),
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[openai wrapped] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper
@@ -541,66 +553,78 @@ def wrap_openai_client(tracer: Tracer, client: TClient) -> TClient:
                 async with async_span_context(
                     tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
                 ) as span:
-                    tracer.add_agent_attributes_to_span(span)
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                    )
-                    model_name = kwargs.get("model", "")
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
-                    )
-                    response = await function(*args, **kwargs)
-                    if isinstance(response, (OpenAIChatCompletionBase, OpenAIResponse)):
-                        output, usage_data = _format_openai_output(response)
-                        # Serialize structured data to JSON for span attribute
-                        if isinstance(output, list):
-                            output_str = orjson.dumps(
-                                output, option=orjson.OPT_INDENT_2
-                            ).decode()
-                        else:
-                            output_str = str(output) if output is not None else None
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
                         set_span_attribute(
-                            span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
                         )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_openai_tokens(usage_data)
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
-                            )
+                        model_name = kwargs.get("model", "")
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[openai wrapped_async] Error adding span metadata: {e}"
+                        )
+                    response = await function(*args, **kwargs)
+                    try:
+                        if isinstance(
+                            response, (OpenAIChatCompletionBase, OpenAIResponse)
+                        ):
+                            output, usage_data = _format_openai_output(response)
+                            # Serialize structured data to JSON for span attribute
+                            if isinstance(output, list):
+                                output_str = safe_serialize(output)
+                            else:
+                                output_str = str(output) if output is not None else None
                             set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
-                                cache_creation,
+                                span, AttributeKeys.GEN_AI_COMPLETION, output_str
                             )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_openai_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                                    cache_creation,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                getattr(response, "model", model_name),
                             )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            getattr(response, "model", model_name),
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[openai wrapped_async] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper

judgeval/tracer/llm/llm_together/wrapper.py CHANGED Viewed

@@ -19,6 +19,7 @@ from judgeval.tracer.llm.llm_together.config import (
     together_AsyncTogether,
 )
 from judgeval.tracer.managers import sync_span_context, async_span_context
+from judgeval.logger import judgeval_logger
 from judgeval.tracer.keys import AttributeKeys
 from judgeval.tracer.utils import set_span_attribute
 from judgeval.utils.serialize import safe_serialize
@@ -296,73 +297,85 @@ def wrap_together_client(
                 with sync_span_context(
                     tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
                 ) as span:
-                    tracer.add_agent_attributes_to_span(span)
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                    )
-                    model_name = kwargs.get("model", "")
-                    # Add together_ai/ prefix for server-side cost calculation
-                    prefixed_model_name = (
-                        f"together_ai/{model_name}" if model_name else ""
-                    )
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
-                    )
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
+                        )
+                        model_name = kwargs.get("model", "")
+                        # Add together_ai/ prefix for server-side cost calculation
+                        prefixed_model_name = (
+                            f"together_ai/{model_name}" if model_name else ""
+                        )
+                        set_span_attribute(
+                            span,
+                            AttributeKeys.GEN_AI_REQUEST_MODEL,
+                            prefixed_model_name,
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[together wrapped] Error adding span metadata: {e}"
+                        )
                     response = function(*args, **kwargs)
-                    if isinstance(response, TogetherChatCompletion):
-                        output, usage_data = _format_together_output(response)
-                        # Serialize structured data to JSON for span attribute
-                        if output:
-                            if isinstance(output, list):
-                                import orjson
-                                output_str = orjson.dumps(
-                                    output, option=orjson.OPT_INDENT_2
-                                ).decode()
-                            else:
-                                output_str = str(output)
-                            set_span_attribute(
-                                span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                    try:
+                        if isinstance(response, TogetherChatCompletion):
+                            output, usage_data = _format_together_output(response)
+                            # Serialize structured data to JSON for span attribute
+                            if output:
+                                if isinstance(output, list):
+                                    output_str = safe_serialize(output)
+                                else:
+                                    output_str = str(output)
+                                set_span_attribute(
+                                    span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                                )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_together_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
+                            # Add together_ai/ prefix to response model for server-side cost calculation
+                            response_model = getattr(response, "model", model_name)
+                            prefixed_response_model = (
+                                f"together_ai/{response_model}"
+                                if response_model
+                                else ""
                             )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_together_tokens(usage_data)
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                prefixed_response_model,
                             )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
-                            )
-                        # Add together_ai/ prefix to response model for server-side cost calculation
-                        response_model = getattr(response, "model", model_name)
-                        prefixed_response_model = (
-                            f"together_ai/{response_model}" if response_model else ""
-                        )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            prefixed_response_model,
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[together wrapped] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper
@@ -391,73 +404,85 @@ def wrap_together_client(
                 async with async_span_context(
                     tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
                 ) as span:
-                    tracer.add_agent_attributes_to_span(span)
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                    )
-                    model_name = kwargs.get("model", "")
-                    # Add together_ai/ prefix for server-side cost calculation
-                    prefixed_model_name = (
-                        f"together_ai/{model_name}" if model_name else ""
-                    )
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
-                    )
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
+                        )
+                        model_name = kwargs.get("model", "")
+                        # Add together_ai/ prefix for server-side cost calculation
+                        prefixed_model_name = (
+                            f"together_ai/{model_name}" if model_name else ""
+                        )
+                        set_span_attribute(
+                            span,
+                            AttributeKeys.GEN_AI_REQUEST_MODEL,
+                            prefixed_model_name,
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[together wrapped_async] Error adding span metadata: {e}"
+                        )
                     response = await function(*args, **kwargs)
-                    if isinstance(response, TogetherChatCompletion):
-                        output, usage_data = _format_together_output(response)
-                        # Serialize structured data to JSON for span attribute
-                        if output:
-                            if isinstance(output, list):
-                                import orjson
-                                output_str = orjson.dumps(
-                                    output, option=orjson.OPT_INDENT_2
-                                ).decode()
-                            else:
-                                output_str = str(output)
-                            set_span_attribute(
-                                span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                    try:
+                        if isinstance(response, TogetherChatCompletion):
+                            output, usage_data = _format_together_output(response)
+                            # Serialize structured data to JSON for span attribute
+                            if output:
+                                if isinstance(output, list):
+                                    output_str = safe_serialize(output)
+                                else:
+                                    output_str = str(output)
+                                set_span_attribute(
+                                    span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                                )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_together_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
+                            # Add together_ai/ prefix to response model for server-side cost calculation
+                            response_model = getattr(response, "model", model_name)
+                            prefixed_response_model = (
+                                f"together_ai/{response_model}"
+                                if response_model
+                                else ""
                             )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_together_tokens(usage_data)
                             set_span_attribute(
                                 span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                prefixed_response_model,
                             )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
-                            )
-                        # Add together_ai/ prefix to response model for server-side cost calculation
-                        response_model = getattr(response, "model", model_name)
-                        prefixed_response_model = (
-                            f"together_ai/{response_model}" if response_model else ""
-                        )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            prefixed_response_model,
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[together wrapped_async] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper

judgeval/tracer/utils.py CHANGED Viewed

@@ -13,7 +13,7 @@ def set_span_attribute(span: Span, name: str, value: Any):
 class TraceScorerConfig(BaseModel):
-    scorer: TraceAPIScorerConfig
+    scorer: TraceAPIScorerConfig | None
     model: Optional[str] = None
     sampling_rate: float = 1.0
     run_condition: Optional[Callable[..., bool]] = None

judgeval/utils/decorators/dont_throw.py CHANGED Viewed

@@ -13,7 +13,7 @@ def dont_throw(func: Callable[..., T]) -> Callable[..., T | None]:
         try:
             return func(*args, **kwargs)
         except Exception as e:
-            judgeval_logger.debug(
+            judgeval_logger.warning(
                 f"An exception was raised in {func.__name__}", exc_info=e
             )
             pass

judgeval/version.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.16.5"
+__version__ = "0.16.7"
 def get_version() -> str:

{judgeval-0.16.5.dist-info → judgeval-0.16.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: judgeval
-Version: 0.16.5
+Version: 0.16.7
 Summary: Judgeval Package
 Project-URL: Homepage, https://github.com/JudgmentLabs/judgeval
 Project-URL: Issues, https://github.com/JudgmentLabs/judgeval/issues

judgeval 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

Potentially problematic release.

judgeval 0.16.5py3-none-any.whl → 0.16.7py3-none-any.whl