PyPI - judgeval - Versions diffs - 0.16.4__py3-none-any.whl → 0.16.6__py3-none-any.whl - Mend

judgeval 0.16.4py3-none-any.whl → 0.16.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (16) hide show

judgeval/__init__.py +7 -2
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +15 -4
judgeval/tracer/__init__.py +9 -1
judgeval/tracer/llm/llm_anthropic/wrapper.py +160 -130
judgeval/tracer/llm/llm_google/wrapper.py +137 -98
judgeval/tracer/llm/llm_groq/wrapper.py +137 -116
judgeval/tracer/llm/llm_openai/wrapper.py +130 -106
judgeval/tracer/llm/llm_together/wrapper.py +145 -120
judgeval/tracer/utils.py +1 -1
judgeval/utils/decorators/dont_throw.py +1 -1
judgeval/version.py +1 -1
{judgeval-0.16.4.dist-info → judgeval-0.16.6.dist-info}/METADATA +2 -2
{judgeval-0.16.4.dist-info → judgeval-0.16.6.dist-info}/RECORD +16 -16
{judgeval-0.16.4.dist-info → judgeval-0.16.6.dist-info}/WHEEL +0 -0
{judgeval-0.16.4.dist-info → judgeval-0.16.6.dist-info}/entry_points.txt +0 -0
{judgeval-0.16.4.dist-info → judgeval-0.16.6.dist-info}/licenses/LICENSE.md +0 -0

judgeval/tracer/llm/llm_google/wrapper.py CHANGED Viewed

@@ -18,6 +18,7 @@ from judgeval.tracer.llm.llm_google.config import (
     google_genai_AsyncClient,
 )
 from judgeval.tracer.managers import sync_span_context, async_span_context
+from judgeval.logger import judgeval_logger
 from judgeval.tracer.keys import AttributeKeys
 from judgeval.tracer.utils import set_span_attribute
 from judgeval.utils.serialize import safe_serialize
@@ -263,23 +264,10 @@ def wrap_google_client(tracer: Tracer, client: GoogleClientType) -> GoogleClient
         @functools.wraps(function)
         def wrapper(*args, **kwargs):
             if kwargs.get("stream", False):
-                span = tracer.get_tracer().start_span(
-                    span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
-                )
-                tracer.add_agent_attributes_to_span(span)
-                set_span_attribute(
-                    span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                )
-                model_name = kwargs.get("model", "")
-                set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
-                stream_response = function(*args, **kwargs)
-                return TracedGoogleGenerator(
-                    tracer, stream_response, client, span, model_name
-                )
-            else:
-                with sync_span_context(
-                    tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
-                ) as span:
+                try:
+                    span = tracer.get_tracer().start_span(
+                        span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+                    )
                     tracer.add_agent_attributes_to_span(span)
                     set_span_attribute(
                         span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
@@ -288,46 +276,78 @@ def wrap_google_client(tracer: Tracer, client: GoogleClientType) -> GoogleClient
                     set_span_attribute(
                         span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
                     )
-                    response = function(*args, **kwargs)
-                    if isinstance(response, GoogleGenerateContentResponse):
-                        output, usage_data = _format_google_output(response)
+                except Exception as e:
+                    judgeval_logger.error(
+                        f"[google wrapped] Error adding span metadata: {e}"
+                    )
+                stream_response = function(*args, **kwargs)
+                return TracedGoogleGenerator(
+                    tracer, stream_response, client, span, model_name
+                )
+            else:
+                with sync_span_context(
+                    tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+                ) as span:
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
                         set_span_attribute(
-                            span, AttributeKeys.GEN_AI_COMPLETION, output
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
                         )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_google_tokens(usage_data)
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
+                        model_name = kwargs.get("model", "")
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[google wrapped] Error adding span metadata: {e}"
+                        )
+                    response = function(*args, **kwargs)
+                    try:
+                        if isinstance(response, GoogleGenerateContentResponse):
+                            output, usage_data = _format_google_output(response)
                             set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
+                                span, AttributeKeys.GEN_AI_COMPLETION, output
                             )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_google_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                getattr(response, "model_version", model_name),
                             )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            getattr(response, "model_version", model_name),
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[google wrapped] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper
@@ -335,23 +355,10 @@ def wrap_google_client(tracer: Tracer, client: GoogleClientType) -> GoogleClient
         @functools.wraps(function)
         async def wrapper(*args, **kwargs):
             if kwargs.get("stream", False):
-                span = tracer.get_tracer().start_span(
-                    span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
-                )
-                tracer.add_agent_attributes_to_span(span)
-                set_span_attribute(
-                    span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                )
-                model_name = kwargs.get("model", "")
-                set_span_attribute(span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name)
-                stream_response = await function(*args, **kwargs)
-                return TracedGoogleAsyncGenerator(
-                    tracer, stream_response, client, span, model_name
-                )
-            else:
-                async with async_span_context(
-                    tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
-                ) as span:
+                try:
+                    span = tracer.get_tracer().start_span(
+                        span_name, attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+                    )
                     tracer.add_agent_attributes_to_span(span)
                     set_span_attribute(
                         span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
@@ -360,46 +367,78 @@ def wrap_google_client(tracer: Tracer, client: GoogleClientType) -> GoogleClient
                     set_span_attribute(
                         span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
                     )
-                    response = await function(*args, **kwargs)
-                    if isinstance(response, GoogleGenerateContentResponse):
-                        output, usage_data = _format_google_output(response)
+                except Exception as e:
+                    judgeval_logger.error(
+                        f"[google wrapped_async] Error adding span metadata: {e}"
+                    )
+                stream_response = await function(*args, **kwargs)
+                return TracedGoogleAsyncGenerator(
+                    tracer, stream_response, client, span, model_name
+                )
+            else:
+                async with async_span_context(
+                    tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+                ) as span:
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
                         set_span_attribute(
-                            span, AttributeKeys.GEN_AI_COMPLETION, output
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
                         )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_google_tokens(usage_data)
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
+                        model_name = kwargs.get("model", "")
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_REQUEST_MODEL, model_name
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[google wrapped_async] Error adding span metadata: {e}"
+                        )
+                    response = await function(*args, **kwargs)
+                    try:
+                        if isinstance(response, GoogleGenerateContentResponse):
+                            output, usage_data = _format_google_output(response)
                             set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
+                                span, AttributeKeys.GEN_AI_COMPLETION, output
                             )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_google_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                getattr(response, "model_version", model_name),
                             )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            getattr(response, "model_version", model_name),
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[google wrapped_async] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper

judgeval/tracer/llm/llm_groq/wrapper.py CHANGED Viewed

@@ -19,6 +19,7 @@ from judgeval.tracer.llm.llm_groq.config import (
     groq_AsyncGroq,
 )
 from judgeval.tracer.managers import sync_span_context, async_span_context
+from judgeval.logger import judgeval_logger
 from judgeval.tracer.keys import AttributeKeys
 from judgeval.tracer.utils import set_span_attribute
 from judgeval.utils.serialize import safe_serialize
@@ -295,71 +296,81 @@ def wrap_groq_client(tracer: Tracer, client: GroqClientType) -> GroqClientType:
                 with sync_span_context(
                     tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
                 ) as span:
-                    tracer.add_agent_attributes_to_span(span)
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                    )
-                    model_name = kwargs.get("model", "")
-                    # Add groq/ prefix for server-side cost calculation
-                    prefixed_model_name = f"groq/{model_name}" if model_name else ""
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
-                    )
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
+                        )
+                        model_name = kwargs.get("model", "")
+                        # Add groq/ prefix for server-side cost calculation
+                        prefixed_model_name = f"groq/{model_name}" if model_name else ""
+                        set_span_attribute(
+                            span,
+                            AttributeKeys.GEN_AI_REQUEST_MODEL,
+                            prefixed_model_name,
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[groq wrapped] Error adding span metadata: {e}"
+                        )
                     response = function(*args, **kwargs)
-                    if isinstance(response, GroqChatCompletion):
-                        output, usage_data = _format_groq_output(response)
-                        # Serialize structured data to JSON for span attribute
-                        if output:
-                            if isinstance(output, list):
-                                import orjson
-                                output_str = orjson.dumps(
-                                    output, option=orjson.OPT_INDENT_2
-                                ).decode()
-                            else:
-                                output_str = str(output)
-                            set_span_attribute(
-                                span, AttributeKeys.GEN_AI_COMPLETION, output_str
-                            )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_groq_tokens(usage_data)
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
+                    try:
+                        if isinstance(response, GroqChatCompletion):
+                            output, usage_data = _format_groq_output(response)
+                            # Serialize structured data to JSON for span attribute
+                            if output:
+                                if isinstance(output, list):
+                                    output_str = safe_serialize(output)
+                                else:
+                                    output_str = str(output)
+                                set_span_attribute(
+                                    span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                                )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_groq_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
+                            # Add groq/ prefix to response model for server-side cost calculation
+                            response_model = getattr(response, "model", model_name)
+                            prefixed_response_model = (
+                                f"groq/{response_model}" if response_model else ""
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                prefixed_response_model,
                             )
-                        # Add groq/ prefix to response model for server-side cost calculation
-                        response_model = getattr(response, "model", model_name)
-                        prefixed_response_model = (
-                            f"groq/{response_model}" if response_model else ""
-                        )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            prefixed_response_model,
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[groq wrapped] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper
@@ -388,71 +399,81 @@ def wrap_groq_client(tracer: Tracer, client: GroqClientType) -> GroqClientType:
                 async with async_span_context(
                     tracer, span_name, {AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
                 ) as span:
-                    tracer.add_agent_attributes_to_span(span)
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
-                    )
-                    model_name = kwargs.get("model", "")
-                    # Add groq/ prefix for server-side cost calculation
-                    prefixed_model_name = f"groq/{model_name}" if model_name else ""
-                    set_span_attribute(
-                        span, AttributeKeys.GEN_AI_REQUEST_MODEL, prefixed_model_name
-                    )
+                    try:
+                        tracer.add_agent_attributes_to_span(span)
+                        set_span_attribute(
+                            span, AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs)
+                        )
+                        model_name = kwargs.get("model", "")
+                        # Add groq/ prefix for server-side cost calculation
+                        prefixed_model_name = f"groq/{model_name}" if model_name else ""
+                        set_span_attribute(
+                            span,
+                            AttributeKeys.GEN_AI_REQUEST_MODEL,
+                            prefixed_model_name,
+                        )
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[groq wrapped_async] Error adding span metadata: {e}"
+                        )
                     response = await function(*args, **kwargs)
-                    if isinstance(response, GroqChatCompletion):
-                        output, usage_data = _format_groq_output(response)
-                        # Serialize structured data to JSON for span attribute
-                        if output:
-                            if isinstance(output, list):
-                                import orjson
-                                output_str = orjson.dumps(
-                                    output, option=orjson.OPT_INDENT_2
-                                ).decode()
-                            else:
-                                output_str = str(output)
-                            set_span_attribute(
-                                span, AttributeKeys.GEN_AI_COMPLETION, output_str
-                            )
-                        if usage_data:
-                            (
-                                prompt_tokens,
-                                completion_tokens,
-                                cache_read,
-                                cache_creation,
-                            ) = _extract_groq_tokens(usage_data)
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
-                                prompt_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
-                                completion_tokens,
-                            )
-                            set_span_attribute(
-                                span,
-                                AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
-                                cache_read,
+                    try:
+                        if isinstance(response, GroqChatCompletion):
+                            output, usage_data = _format_groq_output(response)
+                            # Serialize structured data to JSON for span attribute
+                            if output:
+                                if isinstance(output, list):
+                                    output_str = safe_serialize(output)
+                                else:
+                                    output_str = str(output)
+                                set_span_attribute(
+                                    span, AttributeKeys.GEN_AI_COMPLETION, output_str
+                                )
+                            if usage_data:
+                                (
+                                    prompt_tokens,
+                                    completion_tokens,
+                                    cache_read,
+                                    cache_creation,
+                                ) = _extract_groq_tokens(usage_data)
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_INPUT_TOKENS,
+                                    prompt_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_OUTPUT_TOKENS,
+                                    completion_tokens,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                                    cache_read,
+                                )
+                                set_span_attribute(
+                                    span,
+                                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                                    safe_serialize(usage_data),
+                                )
+                            # Add groq/ prefix to response model for server-side cost calculation
+                            response_model = getattr(response, "model", model_name)
+                            prefixed_response_model = (
+                                f"groq/{response_model}" if response_model else ""
                             )
                             set_span_attribute(
                                 span,
-                                AttributeKeys.JUDGMENT_USAGE_METADATA,
-                                safe_serialize(usage_data),
+                                AttributeKeys.GEN_AI_RESPONSE_MODEL,
+                                prefixed_response_model,
                             )
-                        # Add groq/ prefix to response model for server-side cost calculation
-                        response_model = getattr(response, "model", model_name)
-                        prefixed_response_model = (
-                            f"groq/{response_model}" if response_model else ""
-                        )
-                        set_span_attribute(
-                            span,
-                            AttributeKeys.GEN_AI_RESPONSE_MODEL,
-                            prefixed_response_model,
+                    except Exception as e:
+                        judgeval_logger.error(
+                            f"[groq wrapped_async] Error adding span metadata: {e}"
                         )
-                    return response
+                    finally:
+                        return response
         return wrapper

judgeval 0.16.4__py3-none-any.whl → 0.16.6__py3-none-any.whl

Potentially problematic release.

judgeval 0.16.4py3-none-any.whl → 0.16.6py3-none-any.whl