PyPI - judgeval - Versions diffs - 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

judgeval 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

judgeval/__init__.py +2 -0
judgeval/cli.py +65 -0
judgeval/clients.py +2 -1
judgeval/common/api/api.py +46 -54
judgeval/common/api/constants.py +18 -5
judgeval/common/api/json_encoder.py +241 -0
judgeval/common/tracer/core.py +772 -467
judgeval/common/tracer/otel_span_processor.py +1 -1
judgeval/common/tracer/providers.py +119 -0
judgeval/common/tracer/span_processor.py +1 -1
judgeval/common/tracer/span_transformer.py +16 -26
judgeval/constants.py +1 -0
judgeval/data/evaluation_run.py +104 -0
judgeval/data/judgment_types.py +38 -8
judgeval/data/trace.py +6 -122
judgeval/data/trace_run.py +2 -3
judgeval/dataset.py +2 -0
judgeval/integrations/langgraph.py +2 -1
judgeval/judges/litellm_judge.py +2 -1
judgeval/judges/mixture_of_judges.py +2 -1
judgeval/judges/utils.py +2 -1
judgeval/judgment_client.py +113 -53
judgeval/local_eval_queue.py +190 -0
judgeval/run_evaluation.py +43 -197
judgeval/scorers/base_scorer.py +9 -10
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +17 -3
judgeval/scorers/score.py +33 -11
judgeval/utils/async_utils.py +36 -0
{judgeval-0.4.0.dist-info → judgeval-0.6.0.dist-info}/METADATA +11 -12
{judgeval-0.4.0.dist-info → judgeval-0.6.0.dist-info}/RECORD +33 -27
judgeval-0.6.0.dist-info/entry_points.txt +2 -0
judgeval/evaluation_run.py +0 -76
{judgeval-0.4.0.dist-info → judgeval-0.6.0.dist-info}/WHEEL +0 -0
{judgeval-0.4.0.dist-info → judgeval-0.6.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/tracer/core.py CHANGED Viewed

@@ -26,11 +26,15 @@ from typing import (
     Generator,
     List,
     Optional,
+    ParamSpec,
     Tuple,
+    TypeVar,
     Union,
     TypeAlias,
+    overload,
 )
 import types
+import random
 from judgeval.common.tracer.constants import _TRACE_FILEPATH_BLOCKLIST
@@ -38,40 +42,33 @@ from judgeval.common.tracer.constants import _TRACE_FILEPATH_BLOCKLIST
 from judgeval.common.tracer.otel_span_processor import JudgmentSpanProcessor
 from judgeval.common.tracer.span_processor import SpanProcessorBase
 from judgeval.common.tracer.trace_manager import TraceManagerClient
-from litellm import cost_per_token as _original_cost_per_token
-from openai import OpenAI, AsyncOpenAI
-from openai.types.chat.chat_completion import ChatCompletion
-from openai.types.responses.response import Response
-from openai.types.chat import ParsedChatCompletion
-from together import Together, AsyncTogether
-from anthropic import Anthropic, AsyncAnthropic
-from google import genai
-from groq import Groq, AsyncGroq
 from judgeval.data import Example, Trace, TraceSpan, TraceUsage
 from judgeval.scorers import APIScorerConfig, BaseScorer
-from judgeval.evaluation_run import EvaluationRun
-from judgeval.common.utils import ExcInfo, validate_api_key
+from judgeval.data.evaluation_run import EvaluationRun
+from judgeval.local_eval_queue import LocalEvaluationQueue
+from judgeval.common.api import JudgmentApiClient
+from judgeval.common.utils import OptExcInfo, validate_api_key
 from judgeval.common.logger import judgeval_logger
+from litellm import cost_per_token as _original_cost_per_token  # type: ignore
+from judgeval.common.tracer.providers import (
+    HAS_OPENAI,
+    HAS_TOGETHER,
+    HAS_ANTHROPIC,
+    HAS_GOOGLE_GENAI,
+    HAS_GROQ,
+    ApiClient,
+)
+from judgeval.constants import DEFAULT_GPT_MODEL
 current_trace_var = contextvars.ContextVar[Optional["TraceClient"]](
     "current_trace", default=None
 )
 current_span_var = contextvars.ContextVar[Optional[str]]("current_span", default=None)
-ApiClient: TypeAlias = Union[
-    OpenAI,
-    Together,
-    Anthropic,
-    AsyncOpenAI,
-    AsyncAnthropic,
-    AsyncTogether,
-    genai.Client,
-    genai.client.AsyncClient,
-    Groq,
-    AsyncGroq,
-]
 SpanType: TypeAlias = str
@@ -113,10 +110,6 @@ class TraceClient:
         self.otel_span_processor = tracer.otel_span_processor
-        judgeval_logger.info(
-            f"🎯 TraceClient using span processor for trace {self.trace_id}"
-        )
     def get_current_span(self):
         """Get the current span from the context var"""
         return self.tracer.get_current_span()
@@ -181,85 +174,53 @@ class TraceClient:
     def async_evaluate(
         self,
-        scorers: List[Union[APIScorerConfig, BaseScorer]],
-        example: Optional[Example] = None,
-        input: Optional[str] = None,
-        actual_output: Optional[Union[str, List[str]]] = None,
-        expected_output: Optional[Union[str, List[str]]] = None,
-        context: Optional[List[str]] = None,
-        retrieval_context: Optional[List[str]] = None,
-        tools_called: Optional[List[str]] = None,
-        expected_tools: Optional[List[str]] = None,
-        additional_metadata: Optional[Dict[str, Any]] = None,
-        model: Optional[str] = None,
-        span_id: Optional[str] = None,
+        scorer: Union[APIScorerConfig, BaseScorer],
+        example: Example,
+        model: str = DEFAULT_GPT_MODEL,
     ):
-        if not self.enable_evaluations:
-            return
         start_time = time.time()
+        span_id = self.get_current_span()
+        eval_run_name = (
+            f"{self.name.capitalize()}-{span_id}-{scorer.score_type.capitalize()}"
+        )
+        hosted_scoring = isinstance(scorer, APIScorerConfig) or (
+            isinstance(scorer, BaseScorer) and scorer.server_hosted
+        )
+        if hosted_scoring:
+            eval_run = EvaluationRun(
+                organization_id=self.tracer.organization_id,
+                project_name=self.project_name,
+                eval_name=eval_run_name,
+                examples=[example],
+                scorers=[scorer],
+                model=model,
+                trace_span_id=span_id,
+            )
-        try:
-            if not scorers:
-                judgeval_logger.warning("No valid scorers available for evaluation")
-                return
-        except Exception as e:
-            judgeval_logger.warning(f"Failed to load scorers: {str(e)}")
-            return
-        if example is None:
-            if any(
-                param is not None
-                for param in [
-                    input,
-                    actual_output,
-                    expected_output,
-                    context,
-                    retrieval_context,
-                    tools_called,
-                    expected_tools,
-                    additional_metadata,
-                ]
-            ):
-                example = Example(
-                    input=input,
-                    actual_output=actual_output,
-                    expected_output=expected_output,
-                    context=context,
-                    retrieval_context=retrieval_context,
-                    tools_called=tools_called,
-                    expected_tools=expected_tools,
-                    additional_metadata=additional_metadata,
-                )
-            else:
-                raise ValueError(
-                    "Either 'example' or at least one of the individual parameters (input, actual_output, etc.) must be provided"
-                )
+            self.add_eval_run(eval_run, start_time)
-        span_id_to_use = span_id if span_id is not None else self.get_current_span()
-        eval_run = EvaluationRun(
-            organization_id=self.tracer.organization_id,
-            project_name=self.project_name,
-            eval_name=f"{self.name.capitalize()}-"
-            f"{span_id_to_use}-"
-            f"[{','.join(scorer.score_type.capitalize() for scorer in scorers)}]",
-            examples=[example],
-            scorers=scorers,
-            model=model,
-            judgment_api_key=self.tracer.api_key,
-            trace_span_id=span_id_to_use,
-        )
+            if span_id:
+                current_span = self.span_id_to_span.get(span_id)
+                if current_span:
+                    self.otel_span_processor.queue_evaluation_run(
+                        eval_run, span_id=span_id, span_data=current_span
+                    )
+        else:
+            # Handle custom scorers using local evaluation queue
+            eval_run = EvaluationRun(
+                organization_id=self.tracer.organization_id,
+                project_name=self.project_name,
+                eval_name=eval_run_name,
+                examples=[example],
+                scorers=[scorer],
+                model=model,
+                trace_span_id=span_id,
+            )
-        self.add_eval_run(eval_run, start_time)
+            self.add_eval_run(eval_run, start_time)
-        if span_id_to_use:
-            current_span = self.span_id_to_span.get(span_id_to_use)
-            if current_span:
-                self.otel_span_processor.queue_evaluation_run(
-                    eval_run, span_id=span_id_to_use, span_data=current_span
-                )
+            # Enqueue the evaluation run to the local evaluation queue
+            self.tracer.local_eval_queue.enqueue(eval_run)
     def add_eval_run(self, eval_run: EvaluationRun, start_time: float):
         current_span_id = eval_run.trace_span_id
@@ -290,6 +251,14 @@ class TraceClient:
             self.otel_span_processor.queue_span_update(span, span_state="agent_name")
+    def record_class_name(self, class_name: str):
+        current_span_id = self.get_current_span()
+        if current_span_id:
+            span = self.span_id_to_span[current_span_id]
+            span.class_name = class_name
+            self.otel_span_processor.queue_span_update(span, span_state="class_name")
     def record_state_before(self, state: dict):
         """Records the agent's state before a tool execution on the current span.
@@ -316,35 +285,13 @@ class TraceClient:
             self.otel_span_processor.queue_span_update(span, span_state="state_after")
-    async def _update_coroutine(self, span: TraceSpan, coroutine: Any, field: str):
-        """Helper method to update the output of a trace entry once the coroutine completes"""
-        try:
-            result = await coroutine
-            setattr(span, field, result)
-            if field == "output":
-                self.otel_span_processor.queue_span_update(span, span_state="output")
-            return result
-        except Exception as e:
-            setattr(span, field, f"Error: {str(e)}")
-            if field == "output":
-                self.otel_span_processor.queue_span_update(span, span_state="output")
-            raise
     def record_output(self, output: Any):
         current_span_id = self.get_current_span()
         if current_span_id:
             span = self.span_id_to_span[current_span_id]
-            span.output = "<pending>" if inspect.iscoroutine(output) else output
-            if inspect.iscoroutine(output):
-                asyncio.create_task(self._update_coroutine(span, output, "output"))
+            span.output = output
-            if not inspect.iscoroutine(output):
-                self.otel_span_processor.queue_span_update(span, span_state="output")
+            self.otel_span_processor.queue_span_update(span, span_state="output")
             return span
         return None
@@ -517,7 +464,7 @@ class TraceClient:
 def _capture_exception_for_trace(
-    current_trace: Optional[TraceClient], exc_info: ExcInfo
+    current_trace: Optional[TraceClient], exc_info: OptExcInfo
 ):
     if not current_trace:
         return
@@ -681,6 +628,7 @@ class _DeepTracer:
         qual_name = self._get_qual_name(frame)
         instance_name = None
+        class_name = None
         if "self" in frame.f_locals:
             instance = frame.f_locals["self"]
             class_name = instance.__class__.__name__
@@ -754,6 +702,7 @@ class _DeepTracer:
                 parent_span_id=parent_span_id,
                 function=qual_name,
                 agent_name=instance_name,
+                class_name=class_name,
             )
             current_trace.add_span(span)
@@ -841,6 +790,10 @@ class _DeepTracer:
                 self._original_threading_trace = None
+T = TypeVar("T", bound=Callable[..., Any])
+P = ParamSpec("P")
 class Tracer:
     # Tracer.current_trace class variable is currently used in wrap()
     # TODO: Keep track of cross-context state for current trace and current span ID solely through class variables instead of instance variables?
@@ -954,6 +907,15 @@ class Tracer:
             else:
                 self.otel_span_processor = SpanProcessorBase()
+            # Initialize local evaluation queue for custom scorers
+            self.local_eval_queue = LocalEvaluationQueue()
+            # Start workers with callback to log results only if monitoring is enabled
+            if enable_evaluations and enable_monitoring:
+                self.local_eval_queue.start_workers(
+                    callback=self._log_eval_results_callback
+                )
             atexit.register(self._cleanup_on_exit)
         except Exception as e:
             judgeval_logger.error(
@@ -1089,10 +1051,10 @@ class Tracer:
                 # Reset the context variable
                 self.reset_current_trace(token)
-    def identify(
+    def agent(
         self,
-        identifier: str,
-        track_state: bool = False,
+        identifier: Optional[str] = None,
+        track_state: Optional[bool] = False,
         track_attributes: Optional[List[str]] = None,
         field_mappings: Optional[Dict[str, str]] = None,
     ):
@@ -1130,11 +1092,18 @@ class Tracer:
                 "track_state": track_state,
                 "track_attributes": track_attributes,
                 "field_mappings": field_mappings or {},
+                "class_name": class_name,
             }
             return cls
         return decorator
+    def identify(self, *args, **kwargs):
+        judgeval_logger.warning(
+            "identify() is deprecated and may not be supported in future versions of judgeval. Use the agent() decorator instead."
+        )
+        return self.agent(*args, **kwargs)
     def _capture_instance_state(
         self, instance: Any, class_config: Dict[str, Any]
     ) -> Dict[str, Any]:
@@ -1189,11 +1158,24 @@ class Tracer:
             else:
                 trace_client_instance.record_state_after(state)
+    @overload
+    def observe(
+        self, func: T, *, name: Optional[str] = None, span_type: SpanType = "span"
+    ) -> T: ...
+    @overload
+    def observe(
+        self,
+        *,
+        name: Optional[str] = None,
+        span_type: SpanType = "span",
+    ) -> Callable[[T], T]: ...
     def observe(
         self,
-        func=None,
+        func: Optional[T] = None,
         *,
-        name=None,
+        name: Optional[str] = None,
         span_type: SpanType = "span",
     ):
         """
@@ -1210,8 +1192,8 @@ class Tracer:
                 return func if func else lambda f: f
             if func is None:
-                return lambda f: self.observe(
-                    f,
+                return lambda func: self.observe(
+                    func,
                     name=name,
                     span_type=span_type,
                 )
@@ -1220,131 +1202,262 @@ class Tracer:
             original_span_name = name or func.__name__
             # Store custom attributes on the function object
-            func._judgment_span_name = original_span_name
-            func._judgment_span_type = span_type
+            func._judgment_span_name = original_span_name  # type: ignore
+            func._judgment_span_type = span_type  # type: ignore
         except Exception:
             return func
-        if asyncio.iscoroutinefunction(func):
+        def _record_span_data(span, args, kwargs):
+            """Helper function to record inputs, agent info, and state on a span."""
+            # Get class and agent info
+            class_name = None
+            agent_name = None
+            if args and hasattr(args[0], "__class__"):
+                class_name = args[0].__class__.__name__
+                agent_name = get_instance_prefixed_name(
+                    args[0], class_name, self.class_identifiers
+                )
-            @functools.wraps(func)
-            async def async_wrapper(*args, **kwargs):
-                nonlocal original_span_name
-                class_name = None
-                span_name = original_span_name
-                agent_name = None
+            # Record inputs, agent name, class name
+            inputs = combine_args_kwargs(func, args, kwargs)
+            span.record_input(inputs)
+            if agent_name:
+                span.record_agent_name(agent_name)
+            if class_name and class_name in self.class_identifiers:
+                span.record_class_name(class_name)
+            # Capture state before execution
+            self._conditionally_capture_and_record_state(span, args, is_before=True)
+            return class_name, agent_name
-                if args and hasattr(args[0], "__class__"):
-                    class_name = args[0].__class__.__name__
-                    agent_name = get_instance_prefixed_name(
-                        args[0], class_name, self.class_identifiers
+        def _finalize_span_data(span, result, args):
+            """Helper function to record outputs and final state on a span."""
+            # Record output
+            span.record_output(result)
+            # Capture state after execution
+            self._conditionally_capture_and_record_state(span, args, is_before=False)
+        def _cleanup_trace(current_trace, trace_token, wrapper_type="function"):
+            """Helper function to handle trace cleanup in finally blocks."""
+            try:
+                trace_id, server_response = current_trace.save(final_save=True)
+                complete_trace_data = {
+                    "trace_id": current_trace.trace_id,
+                    "name": current_trace.name,
+                    "project_name": current_trace.project_name,
+                    "created_at": datetime.fromtimestamp(
+                        current_trace.start_time or time.time(),
+                        timezone.utc,
+                    ).isoformat(),
+                    "duration": current_trace.get_duration(),
+                    "trace_spans": [
+                        span.model_dump() for span in current_trace.trace_spans
+                    ],
+                    "evaluation_runs": [
+                        run.model_dump() for run in current_trace.evaluation_runs
+                    ],
+                    "offline_mode": self.offline_mode,
+                    "parent_trace_id": current_trace.parent_trace_id,
+                    "parent_name": current_trace.parent_name,
+                    "customer_id": current_trace.customer_id,
+                    "tags": current_trace.tags,
+                    "metadata": current_trace.metadata,
+                    "update_id": current_trace.update_id,
+                }
+                self.traces.append(complete_trace_data)
+                self.reset_current_trace(trace_token)
+            except Exception as e:
+                judgeval_logger.warning(f"Issue with {wrapper_type} cleanup: {e}")
+        def _execute_in_span(
+            current_trace, span_name, span_type, execution_func, args, kwargs
+        ):
+            """Helper function to execute code within a span context."""
+            with current_trace.span(span_name, span_type=span_type) as span:
+                _record_span_data(span, args, kwargs)
+                try:
+                    result = execution_func()
+                    _finalize_span_data(span, result, args)
+                    return result
+                except Exception as e:
+                    _capture_exception_for_trace(current_trace, sys.exc_info())
+                    raise e
+        async def _execute_in_span_async(
+            current_trace, span_name, span_type, async_execution_func, args, kwargs
+        ):
+            """Helper function to execute async code within a span context."""
+            with current_trace.span(span_name, span_type=span_type) as span:
+                _record_span_data(span, args, kwargs)
+                try:
+                    result = await async_execution_func()
+                    _finalize_span_data(span, result, args)
+                    return result
+                except Exception as e:
+                    _capture_exception_for_trace(current_trace, sys.exc_info())
+                    raise e
+        def _create_new_trace(self, span_name):
+            """Helper function to create a new trace and set it as current."""
+            trace_id = str(uuid.uuid4())
+            project = self.project_name
+            current_trace = TraceClient(
+                self,
+                trace_id,
+                span_name,
+                project_name=project,
+                enable_monitoring=self.enable_monitoring,
+                enable_evaluations=self.enable_evaluations,
+            )
+            trace_token = self.set_current_trace(current_trace)
+            return current_trace, trace_token
+        def _execute_with_auto_trace_creation(
+            span_name, span_type, execution_func, args, kwargs
+        ):
+            """Helper function that handles automatic trace creation and span execution."""
+            current_trace = self.get_current_trace()
+            if not current_trace:
+                current_trace, trace_token = _create_new_trace(self, span_name)
+                try:
+                    result = _execute_in_span(
+                        current_trace,
+                        span_name,
+                        span_type,
+                        execution_func,
+                        args,
+                        kwargs,
                     )
+                    return result
+                finally:
+                    # Cleanup the trace we created
+                    _cleanup_trace(current_trace, trace_token, "auto_trace")
+            else:
+                # Use existing trace
+                return _execute_in_span(
+                    current_trace, span_name, span_type, execution_func, args, kwargs
+                )
-                current_trace = self.get_current_trace()
+        async def _execute_with_auto_trace_creation_async(
+            span_name, span_type, async_execution_func, args, kwargs
+        ):
+            """Helper function that handles automatic trace creation and async span execution."""
+            current_trace = self.get_current_trace()
-                if not current_trace:
-                    trace_id = str(uuid.uuid4())
-                    project = self.project_name
+            if not current_trace:
+                current_trace, trace_token = _create_new_trace(self, span_name)
-                    current_trace = TraceClient(
-                        self,
-                        trace_id,
+                try:
+                    result = await _execute_in_span_async(
+                        current_trace,
                         span_name,
-                        project_name=project,
-                        enable_monitoring=self.enable_monitoring,
-                        enable_evaluations=self.enable_evaluations,
+                        span_type,
+                        async_execution_func,
+                        args,
+                        kwargs,
                     )
+                    return result
+                finally:
+                    # Cleanup the trace we created
+                    _cleanup_trace(current_trace, trace_token, "async_auto_trace")
+            else:
+                # Use existing trace
+                return await _execute_in_span_async(
+                    current_trace,
+                    span_name,
+                    span_type,
+                    async_execution_func,
+                    args,
+                    kwargs,
+                )
-                    trace_token = self.set_current_trace(current_trace)
+        # Check for generator functions first
+        if inspect.isgeneratorfunction(func):
-                    try:
-                        with current_trace.span(span_name, span_type=span_type) as span:
-                            inputs = combine_args_kwargs(func, args, kwargs)
-                            span.record_input(inputs)
-                            if agent_name:
-                                span.record_agent_name(agent_name)
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=True
-                            )
+            @functools.wraps(func)
+            def generator_wrapper(*args, **kwargs):
+                # Get the generator from the original function
+                generator = func(*args, **kwargs)
-                            try:
-                                if self.deep_tracing:
-                                    with _DeepTracer(self):
-                                        result = await func(*args, **kwargs)
-                                else:
-                                    result = await func(*args, **kwargs)
-                            except Exception as e:
-                                _capture_exception_for_trace(
-                                    current_trace, sys.exc_info()
-                                )
-                                raise e
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=False
+                # Create wrapper generator that creates spans for each yield
+                def traced_generator():
+                    while True:
+                        try:
+                            # Handle automatic trace creation and span execution
+                            item = _execute_with_auto_trace_creation(
+                                original_span_name,
+                                span_type,
+                                lambda: next(generator),
+                                args,
+                                kwargs,
                             )
+                            yield item
+                        except StopIteration:
+                            break
+                return traced_generator()
+            return generator_wrapper
+        # Check for async generator functions
+        elif inspect.isasyncgenfunction(func):
-                            span.record_output(result)
-                        return result
-                    finally:
+            @functools.wraps(func)
+            def async_generator_wrapper(*args, **kwargs):
+                # Get the async generator from the original function
+                async_generator = func(*args, **kwargs)
+                # Create wrapper async generator that creates spans for each yield
+                async def traced_async_generator():
+                    while True:
                         try:
-                            complete_trace_data = {
-                                "trace_id": current_trace.trace_id,
-                                "name": current_trace.name,
-                                "created_at": datetime.fromtimestamp(
-                                    current_trace.start_time or time.time(),
-                                    timezone.utc,
-                                ).isoformat(),
-                                "duration": current_trace.get_duration(),
-                                "trace_spans": [
-                                    span.model_dump()
-                                    for span in current_trace.trace_spans
-                                ],
-                                "offline_mode": self.offline_mode,
-                                "parent_trace_id": current_trace.parent_trace_id,
-                                "parent_name": current_trace.parent_name,
-                            }
-                            trace_id, server_response = current_trace.save(
-                                final_save=True
+                            # Handle automatic trace creation and span execution
+                            item = await _execute_with_auto_trace_creation_async(
+                                original_span_name,
+                                span_type,
+                                lambda: async_generator.__anext__(),
+                                args,
+                                kwargs,
                             )
+                            if inspect.iscoroutine(item):
+                                item = await item
+                            yield item
+                        except StopAsyncIteration:
+                            break
-                            self.traces.append(complete_trace_data)
+                return traced_async_generator()
-                            self.reset_current_trace(trace_token)
-                        except Exception as e:
-                            judgeval_logger.warning(f"Issue with async_wrapper: {e}")
-                            pass
-                else:
-                    with current_trace.span(span_name, span_type=span_type) as span:
-                        inputs = combine_args_kwargs(func, args, kwargs)
-                        span.record_input(inputs)
-                        if agent_name:
-                            span.record_agent_name(agent_name)
-                        # Capture state before execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=True
-                        )
+            return async_generator_wrapper
-                        try:
-                            if self.deep_tracing:
-                                with _DeepTracer(self):
-                                    result = await func(*args, **kwargs)
-                            else:
-                                result = await func(*args, **kwargs)
-                        except Exception as e:
-                            _capture_exception_for_trace(current_trace, sys.exc_info())
-                            raise e
-                        # Capture state after execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=False
-                        )
+        elif asyncio.iscoroutinefunction(func):
-                        span.record_output(result)
-                    return result
+            @functools.wraps(func)
+            async def async_wrapper(*args, **kwargs):
+                nonlocal original_span_name
+                span_name = original_span_name
+                async def async_execution():
+                    if self.deep_tracing:
+                        with _DeepTracer(self):
+                            return await func(*args, **kwargs)
+                    else:
+                        return await func(*args, **kwargs)
+                result = await _execute_with_auto_trace_creation_async(
+                    span_name, span_type, async_execution, args, kwargs
+                )
+                return result
             return async_wrapper
         else:
@@ -1352,122 +1465,18 @@ class Tracer:
             @functools.wraps(func)
             def wrapper(*args, **kwargs):
                 nonlocal original_span_name
-                class_name = None
                 span_name = original_span_name
-                agent_name = None
-                if args and hasattr(args[0], "__class__"):
-                    class_name = args[0].__class__.__name__
-                    agent_name = get_instance_prefixed_name(
-                        args[0], class_name, self.class_identifiers
-                    )
-                # Get current trace from context
-                current_trace = self.get_current_trace()
-                # If there's no current trace, create a root trace
-                if not current_trace:
-                    trace_id = str(uuid.uuid4())
-                    project = self.project_name
-                    # Create a new trace client to serve as the root
-                    current_trace = TraceClient(
-                        self,
-                        trace_id,
-                        span_name,
-                        project_name=project,
-                        enable_monitoring=self.enable_monitoring,
-                        enable_evaluations=self.enable_evaluations,
-                    )
-                    trace_token = self.set_current_trace(current_trace)
-                    try:
-                        with current_trace.span(span_name, span_type=span_type) as span:
-                            # Record inputs
-                            inputs = combine_args_kwargs(func, args, kwargs)
-                            span.record_input(inputs)
-                            if agent_name:
-                                span.record_agent_name(agent_name)
-                            # Capture state before execution
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=True
-                            )
-                            try:
-                                if self.deep_tracing:
-                                    with _DeepTracer(self):
-                                        result = func(*args, **kwargs)
-                                else:
-                                    result = func(*args, **kwargs)
-                            except Exception as e:
-                                _capture_exception_for_trace(
-                                    current_trace, sys.exc_info()
-                                )
-                                raise e
-                            # Capture state after execution
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=False
-                            )
-                            # Record output
-                            span.record_output(result)
-                        return result
-                    finally:
-                        try:
-                            trace_id, server_response = current_trace.save(
-                                final_save=True
-                            )
-                            complete_trace_data = {
-                                "trace_id": current_trace.trace_id,
-                                "name": current_trace.name,
-                                "created_at": datetime.fromtimestamp(
-                                    current_trace.start_time or time.time(),
-                                    timezone.utc,
-                                ).isoformat(),
-                                "duration": current_trace.get_duration(),
-                                "trace_spans": [
-                                    span.model_dump()
-                                    for span in current_trace.trace_spans
-                                ],
-                                "offline_mode": self.offline_mode,
-                                "parent_trace_id": current_trace.parent_trace_id,
-                                "parent_name": current_trace.parent_name,
-                            }
-                            self.traces.append(complete_trace_data)
-                            self.reset_current_trace(trace_token)
-                        except Exception as e:
-                            judgeval_logger.warning(f"Issue with save: {e}")
-                            pass
-                else:
-                    with current_trace.span(span_name, span_type=span_type) as span:
-                        inputs = combine_args_kwargs(func, args, kwargs)
-                        span.record_input(inputs)
-                        if agent_name:
-                            span.record_agent_name(agent_name)
-                        # Capture state before execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=True
-                        )
+                def sync_execution():
+                    if self.deep_tracing:
+                        with _DeepTracer(self):
+                            return func(*args, **kwargs)
+                    else:
+                        return func(*args, **kwargs)
-                        try:
-                            if self.deep_tracing:
-                                with _DeepTracer(self):
-                                    result = func(*args, **kwargs)
-                            else:
-                                result = func(*args, **kwargs)
-                        except Exception as e:
-                            _capture_exception_for_trace(current_trace, sys.exc_info())
-                            raise e
-                        # Capture state after execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=False
-                        )
-                        span.record_output(result)
-                    return result
+                return _execute_with_auto_trace_creation(
+                    span_name, span_type, sync_execution, args, kwargs
+                )
             return wrapper
@@ -1532,15 +1541,51 @@ class Tracer:
         return decorate_class if cls is None else decorate_class(cls)
-    def async_evaluate(self, *args, **kwargs):
+    def async_evaluate(
+        self,
+        scorer: Union[APIScorerConfig, BaseScorer],
+        example: Example,
+        model: str = DEFAULT_GPT_MODEL,
+        sampling_rate: float = 1,
+    ):
         try:
             if not self.enable_monitoring or not self.enable_evaluations:
                 return
-            current_trace = self.get_current_trace()
+            if not isinstance(scorer, (APIScorerConfig, BaseScorer)):
+                judgeval_logger.warning(
+                    f"Scorer must be an instance of APIScorerConfig or BaseScorer, got {type(scorer)}, skipping evaluation"
+                )
+                return
+            if not isinstance(example, Example):
+                judgeval_logger.warning(
+                    f"Example must be an instance of Example, got {type(example)} skipping evaluation"
+                )
+                return
+            if sampling_rate < 0:
+                judgeval_logger.warning(
+                    "Cannot set sampling_rate below 0, skipping evaluation"
+                )
+                return
+            if sampling_rate > 1:
+                judgeval_logger.warning(
+                    "Cannot set sampling_rate above 1, skipping evaluation"
+                )
+                return
+            percentage = random.uniform(0, 1)
+            if percentage > sampling_rate:
+                judgeval_logger.info("Skipping async_evaluate due to sampling rate")
+                return
+            current_trace = self.get_current_trace()
             if current_trace:
-                current_trace.async_evaluate(*args, **kwargs)
+                current_trace.async_evaluate(
+                    scorer=scorer, example=example, model=model
+                )
             else:
                 judgeval_logger.warning(
                     "No trace found (context var or fallback), skipping evaluation"
@@ -1613,9 +1658,68 @@ class Tracer:
         self.otel_span_processor.shutdown()
         self.otel_span_processor = SpanProcessorBase()
+    def wait_for_completion(self, timeout: Optional[float] = 30.0) -> bool:
+        """Wait for all evaluations and span processing to complete.
+        This method blocks until all queued evaluations are processed and
+        all pending spans are flushed to the server.
+        Args:
+            timeout: Maximum time to wait in seconds. Defaults to 30 seconds.
+                    None means wait indefinitely.
+        Returns:
+            True if all processing completed within the timeout, False otherwise.
+        """
+        try:
+            judgeval_logger.debug(
+                "Waiting for all evaluations and spans to complete..."
+            )
+            # Wait for all queued evaluation work to complete
+            eval_completed = self.local_eval_queue.wait_for_completion()
+            if not eval_completed:
+                judgeval_logger.warning(
+                    f"Local evaluation queue did not complete within {timeout} seconds"
+                )
+                return False
+            self.flush_background_spans()
+            judgeval_logger.debug("All evaluations and spans completed successfully")
+            return True
+        except Exception as e:
+            judgeval_logger.warning(f"Error while waiting for completion: {e}")
+            return False
+    def _log_eval_results_callback(self, evaluation_run, scoring_results):
+        """Callback to log evaluation results after local processing."""
+        try:
+            if scoring_results and self.enable_evaluations and self.enable_monitoring:
+                # Convert scoring results to the format expected by API client
+                results_dict = [
+                    result.model_dump(warnings=False) for result in scoring_results
+                ]
+                api_client = JudgmentApiClient(self.api_key, self.organization_id)
+                api_client.log_evaluation_results(
+                    results_dict, evaluation_run.model_dump(warnings=False)
+                )
+        except Exception as e:
+            judgeval_logger.warning(f"Failed to log local evaluation results: {e}")
     def _cleanup_on_exit(self):
         """Cleanup handler called on application exit to ensure spans are flushed."""
         try:
+            # Wait for all queued evaluation work to complete before stopping
+            completed = self.local_eval_queue.wait_for_completion()
+            if not completed:
+                judgeval_logger.warning(
+                    "Local evaluation queue did not complete within 30 seconds"
+                )
+            self.local_eval_queue.stop_workers()
             self.flush_background_spans()
         except Exception as e:
             judgeval_logger.warning(f"Error during tracer cleanup: {e}")
@@ -1697,33 +1801,76 @@ def wrap(
         return wrapper
-    if isinstance(client, (OpenAI)):
-        setattr(client.chat.completions, "create", wrapped(original_create))
-        setattr(client.responses, "create", wrapped(original_responses_create))
-        setattr(client.beta.chat.completions, "parse", wrapped(original_beta_parse))
-    elif isinstance(client, (AsyncOpenAI)):
-        setattr(client.chat.completions, "create", wrapped_async(original_create))
-        setattr(client.responses, "create", wrapped_async(original_responses_create))
-        setattr(
-            client.beta.chat.completions, "parse", wrapped_async(original_beta_parse)
+    if HAS_OPENAI:
+        from judgeval.common.tracer.providers import openai_OpenAI, openai_AsyncOpenAI
+        assert openai_OpenAI is not None, "OpenAI client not found"
+        assert openai_AsyncOpenAI is not None, "OpenAI async client not found"
+        if isinstance(client, (openai_OpenAI)):
+            setattr(client.chat.completions, "create", wrapped(original_create))
+            setattr(client.responses, "create", wrapped(original_responses_create))
+            setattr(client.beta.chat.completions, "parse", wrapped(original_beta_parse))
+        elif isinstance(client, (openai_AsyncOpenAI)):
+            setattr(client.chat.completions, "create", wrapped_async(original_create))
+            setattr(
+                client.responses, "create", wrapped_async(original_responses_create)
+            )
+            setattr(
+                client.beta.chat.completions,
+                "parse",
+                wrapped_async(original_beta_parse),
+            )
+    if HAS_TOGETHER:
+        from judgeval.common.tracer.providers import (
+            together_Together,
+            together_AsyncTogether,
+        )
+        assert together_Together is not None, "Together client not found"
+        assert together_AsyncTogether is not None, "Together async client not found"
+        if isinstance(client, (together_Together)):
+            setattr(client.chat.completions, "create", wrapped(original_create))
+        elif isinstance(client, (together_AsyncTogether)):
+            setattr(client.chat.completions, "create", wrapped_async(original_create))
+    if HAS_ANTHROPIC:
+        from judgeval.common.tracer.providers import (
+            anthropic_Anthropic,
+            anthropic_AsyncAnthropic,
+        )
+        assert anthropic_Anthropic is not None, "Anthropic client not found"
+        assert anthropic_AsyncAnthropic is not None, "Anthropic async client not found"
+        if isinstance(client, (anthropic_Anthropic)):
+            setattr(client.messages, "create", wrapped(original_create))
+        elif isinstance(client, (anthropic_AsyncAnthropic)):
+            setattr(client.messages, "create", wrapped_async(original_create))
+    if HAS_GOOGLE_GENAI:
+        from judgeval.common.tracer.providers import (
+            google_genai_Client,
+            google_genai_AsyncClient,
         )
-    elif isinstance(client, (Together)):
-        setattr(client.chat.completions, "create", wrapped(original_create))
-    elif isinstance(client, (AsyncTogether)):
-        setattr(client.chat.completions, "create", wrapped_async(original_create))
-    elif isinstance(client, (Anthropic)):
-        setattr(client.messages, "create", wrapped(original_create))
-    elif isinstance(client, (AsyncAnthropic)):
-        setattr(client.messages, "create", wrapped_async(original_create))
-    elif isinstance(client, (genai.Client)):
-        setattr(client.models, "generate_content", wrapped(original_create))
-    elif isinstance(client, (genai.client.AsyncClient)):
-        setattr(client.models, "generate_content", wrapped_async(original_create))
-    elif isinstance(client, (Groq)):
-        setattr(client.chat.completions, "create", wrapped(original_create))
-    elif isinstance(client, (AsyncGroq)):
-        setattr(client.chat.completions, "create", wrapped_async(original_create))
+        assert google_genai_Client is not None, "Google GenAI client not found"
+        assert google_genai_AsyncClient is not None, (
+            "Google GenAI async client not found"
+        )
+        if isinstance(client, (google_genai_Client)):
+            setattr(client.models, "generate_content", wrapped(original_create))
+        elif isinstance(client, (google_genai_AsyncClient)):
+            setattr(client.models, "generate_content", wrapped_async(original_create))
+    if HAS_GROQ:
+        from judgeval.common.tracer.providers import groq_Groq, groq_AsyncGroq
+        assert groq_Groq is not None, "Groq client not found"
+        assert groq_AsyncGroq is not None, "Groq async client not found"
+        if isinstance(client, (groq_Groq)):
+            setattr(client.chat.completions, "create", wrapped(original_create))
+        elif isinstance(client, (groq_AsyncGroq)):
+            setattr(client.chat.completions, "create", wrapped_async(original_create))
     return client
@@ -1749,28 +1896,87 @@ def _get_client_config(
     Raises:
         ValueError: If client type is not supported
     """
-    if isinstance(client, (OpenAI, AsyncOpenAI)):
-        return (
-            "OPENAI_API_CALL",
-            client.chat.completions.create,
-            client.responses.create,
-            None,
-            client.beta.chat.completions.parse,
+    if HAS_OPENAI:
+        from judgeval.common.tracer.providers import openai_OpenAI, openai_AsyncOpenAI
+        assert openai_OpenAI is not None, "OpenAI client not found"
+        assert openai_AsyncOpenAI is not None, "OpenAI async client not found"
+        if isinstance(client, (openai_OpenAI)):
+            return (
+                "OPENAI_API_CALL",
+                client.chat.completions.create,
+                client.responses.create,
+                None,
+                client.beta.chat.completions.parse,
+            )
+        elif isinstance(client, (openai_AsyncOpenAI)):
+            return (
+                "OPENAI_API_CALL",
+                client.chat.completions.create,
+                client.responses.create,
+                None,
+                client.beta.chat.completions.parse,
+            )
+    if HAS_TOGETHER:
+        from judgeval.common.tracer.providers import (
+            together_Together,
+            together_AsyncTogether,
         )
-    elif isinstance(client, (Groq, AsyncGroq)):
-        return "GROQ_API_CALL", client.chat.completions.create, None, None, None
-    elif isinstance(client, (Together, AsyncTogether)):
-        return "TOGETHER_API_CALL", client.chat.completions.create, None, None, None
-    elif isinstance(client, (Anthropic, AsyncAnthropic)):
-        return (
-            "ANTHROPIC_API_CALL",
-            client.messages.create,
-            None,
-            client.messages.stream,
-            None,
+        assert together_Together is not None, "Together client not found"
+        assert together_AsyncTogether is not None, "Together async client not found"
+        if isinstance(client, (together_Together)):
+            return "TOGETHER_API_CALL", client.chat.completions.create, None, None, None
+        elif isinstance(client, (together_AsyncTogether)):
+            return "TOGETHER_API_CALL", client.chat.completions.create, None, None, None
+    if HAS_ANTHROPIC:
+        from judgeval.common.tracer.providers import (
+            anthropic_Anthropic,
+            anthropic_AsyncAnthropic,
+        )
+        assert anthropic_Anthropic is not None, "Anthropic client not found"
+        assert anthropic_AsyncAnthropic is not None, "Anthropic async client not found"
+        if isinstance(client, (anthropic_Anthropic)):
+            return (
+                "ANTHROPIC_API_CALL",
+                client.messages.create,
+                None,
+                client.messages.stream,
+                None,
+            )
+        elif isinstance(client, (anthropic_AsyncAnthropic)):
+            return (
+                "ANTHROPIC_API_CALL",
+                client.messages.create,
+                None,
+                client.messages.stream,
+                None,
+            )
+    if HAS_GOOGLE_GENAI:
+        from judgeval.common.tracer.providers import (
+            google_genai_Client,
+            google_genai_AsyncClient,
         )
-    elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
-        return "GOOGLE_API_CALL", client.models.generate_content, None, None, None
+        assert google_genai_Client is not None, "Google GenAI client not found"
+        assert google_genai_AsyncClient is not None, (
+            "Google GenAI async client not found"
+        )
+        if isinstance(client, (google_genai_Client)):
+            return "GOOGLE_API_CALL", client.models.generate_content, None, None, None
+        elif isinstance(client, (google_genai_AsyncClient)):
+            return "GOOGLE_API_CALL", client.models.generate_content, None, None, None
+    if HAS_GROQ:
+        from judgeval.common.tracer.providers import groq_Groq, groq_AsyncGroq
+        assert groq_Groq is not None, "Groq client not found"
+        assert groq_AsyncGroq is not None, "Groq async client not found"
+        if isinstance(client, (groq_Groq)):
+            return "GROQ_API_CALL", client.chat.completions.create, None, None, None
+        elif isinstance(client, (groq_AsyncGroq)):
+            return "GROQ_API_CALL", client.chat.completions.create, None, None, None
     raise ValueError(f"Unsupported client type: {type(client)}")
@@ -1794,73 +2000,173 @@ def _format_output_data(
     model_name = None
     message_content = None
-    if isinstance(client, (OpenAI, AsyncOpenAI)):
-        if isinstance(response, ChatCompletion):
-            model_name = response.model
-            prompt_tokens = response.usage.prompt_tokens if response.usage else 0
-            completion_tokens = (
-                response.usage.completion_tokens if response.usage else 0
+    if HAS_OPENAI:
+        from judgeval.common.tracer.providers import (
+            openai_OpenAI,
+            openai_AsyncOpenAI,
+            openai_ChatCompletion,
+            openai_Response,
+            openai_ParsedChatCompletion,
+        )
+        assert openai_OpenAI is not None, "OpenAI client not found"
+        assert openai_AsyncOpenAI is not None, "OpenAI async client not found"
+        assert openai_ChatCompletion is not None, "OpenAI chat completion not found"
+        assert openai_Response is not None, "OpenAI response not found"
+        assert openai_ParsedChatCompletion is not None, (
+            "OpenAI parsed chat completion not found"
+        )
+        if isinstance(client, (openai_OpenAI, openai_AsyncOpenAI)):
+            if isinstance(response, openai_ChatCompletion):
+                model_name = response.model
+                prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+                completion_tokens = (
+                    response.usage.completion_tokens if response.usage else 0
+                )
+                cache_read_input_tokens = (
+                    response.usage.prompt_tokens_details.cached_tokens
+                    if response.usage
+                    and response.usage.prompt_tokens_details
+                    and response.usage.prompt_tokens_details.cached_tokens
+                    else 0
+                )
+                if isinstance(response, openai_ParsedChatCompletion):
+                    message_content = response.choices[0].message.parsed
+                else:
+                    message_content = response.choices[0].message.content
+            elif isinstance(response, openai_Response):
+                model_name = response.model
+                prompt_tokens = response.usage.input_tokens if response.usage else 0
+                completion_tokens = (
+                    response.usage.output_tokens if response.usage else 0
+                )
+                cache_read_input_tokens = (
+                    response.usage.input_tokens_details.cached_tokens
+                    if response.usage and response.usage.input_tokens_details
+                    else 0
+                )
+                if hasattr(response.output[0], "content"):
+                    message_content = "".join(
+                        seg.text
+                        for seg in response.output[0].content
+                        if hasattr(seg, "text")
+                    )
+            # Note: LiteLLM seems to use cache_read_input_tokens to calculate the cost for OpenAI
+            return message_content, _create_usage(
+                model_name,
+                prompt_tokens,
+                completion_tokens,
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
             )
-            cache_read_input_tokens = (
-                response.usage.prompt_tokens_details.cached_tokens
-                if response.usage
-                and response.usage.prompt_tokens_details
-                and response.usage.prompt_tokens_details.cached_tokens
-                else 0
+    if HAS_TOGETHER:
+        from judgeval.common.tracer.providers import (
+            together_Together,
+            together_AsyncTogether,
+        )
+        assert together_Together is not None, "Together client not found"
+        assert together_AsyncTogether is not None, "Together async client not found"
+        if isinstance(client, (together_Together, together_AsyncTogether)):
+            model_name = "together_ai/" + response.model
+            prompt_tokens = response.usage.prompt_tokens
+            completion_tokens = response.usage.completion_tokens
+            message_content = response.choices[0].message.content
+            # As of 2025-07-14, Together does not do any input cache token tracking
+            return message_content, _create_usage(
+                model_name,
+                prompt_tokens,
+                completion_tokens,
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
             )
-            if isinstance(response, ParsedChatCompletion):
-                message_content = response.choices[0].message.parsed
-            else:
-                message_content = response.choices[0].message.content
-        elif isinstance(response, Response):
+    if HAS_GOOGLE_GENAI:
+        from judgeval.common.tracer.providers import (
+            google_genai_Client,
+            google_genai_AsyncClient,
+        )
+        assert google_genai_Client is not None, "Google GenAI client not found"
+        assert google_genai_AsyncClient is not None, (
+            "Google GenAI async client not found"
+        )
+        if isinstance(client, (google_genai_Client, google_genai_AsyncClient)):
+            model_name = response.model_version
+            prompt_tokens = response.usage_metadata.prompt_token_count
+            completion_tokens = response.usage_metadata.candidates_token_count
+            message_content = response.candidates[0].content.parts[0].text
+            if hasattr(response.usage_metadata, "cached_content_token_count"):
+                cache_read_input_tokens = (
+                    response.usage_metadata.cached_content_token_count
+                )
+            return message_content, _create_usage(
+                model_name,
+                prompt_tokens,
+                completion_tokens,
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
+            )
+    if HAS_ANTHROPIC:
+        from judgeval.common.tracer.providers import (
+            anthropic_Anthropic,
+            anthropic_AsyncAnthropic,
+        )
+        assert anthropic_Anthropic is not None, "Anthropic client not found"
+        assert anthropic_AsyncAnthropic is not None, "Anthropic async client not found"
+        if isinstance(client, (anthropic_Anthropic, anthropic_AsyncAnthropic)):
             model_name = response.model
-            prompt_tokens = response.usage.input_tokens if response.usage else 0
-            completion_tokens = response.usage.output_tokens if response.usage else 0
-            cache_read_input_tokens = (
-                response.usage.input_tokens_details.cached_tokens
-                if response.usage and response.usage.input_tokens_details
-                else 0
+            prompt_tokens = response.usage.input_tokens
+            completion_tokens = response.usage.output_tokens
+            cache_read_input_tokens = response.usage.cache_read_input_tokens
+            cache_creation_input_tokens = response.usage.cache_creation_input_tokens
+            message_content = response.content[0].text
+            return message_content, _create_usage(
+                model_name,
+                prompt_tokens,
+                completion_tokens,
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
             )
-            if hasattr(response.output[0], "content"):
-                message_content = "".join(
-                    seg.text
-                    for seg in response.output[0].content
-                    if hasattr(seg, "text")
-                )
-        # Note: LiteLLM seems to use cache_read_input_tokens to calculate the cost for OpenAI
-    elif isinstance(client, (Together, AsyncTogether)):
-        model_name = "together_ai/" + response.model
-        prompt_tokens = response.usage.prompt_tokens
-        completion_tokens = response.usage.completion_tokens
-        message_content = response.choices[0].message.content
-        # As of 2025-07-14, Together does not do any input cache token tracking
-    elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
-        model_name = response.model_version
-        prompt_tokens = response.usage_metadata.prompt_token_count
-        completion_tokens = response.usage_metadata.candidates_token_count
-        message_content = response.candidates[0].content.parts[0].text
-        if hasattr(response.usage_metadata, "cached_content_token_count"):
-            cache_read_input_tokens = response.usage_metadata.cached_content_token_count
-    elif isinstance(client, (Anthropic, AsyncAnthropic)):
-        model_name = response.model
-        prompt_tokens = response.usage.input_tokens
-        completion_tokens = response.usage.output_tokens
-        cache_read_input_tokens = response.usage.cache_read_input_tokens
-        cache_creation_input_tokens = response.usage.cache_creation_input_tokens
-        message_content = response.content[0].text
-    elif isinstance(client, (Groq, AsyncGroq)):
-        model_name = "groq/" + response.model
-        prompt_tokens = response.usage.prompt_tokens
-        completion_tokens = response.usage.completion_tokens
-        message_content = response.choices[0].message.content
-    else:
-        judgeval_logger.warning(f"Unsupported client type: {type(client)}")
-        return None, None
+    if HAS_GROQ:
+        from judgeval.common.tracer.providers import groq_Groq, groq_AsyncGroq
+        assert groq_Groq is not None, "Groq client not found"
+        assert groq_AsyncGroq is not None, "Groq async client not found"
+        if isinstance(client, (groq_Groq, groq_AsyncGroq)):
+            model_name = "groq/" + response.model
+            prompt_tokens = response.usage.prompt_tokens
+            completion_tokens = response.usage.completion_tokens
+            message_content = response.choices[0].message.content
+            return message_content, _create_usage(
+                model_name,
+                prompt_tokens,
+                completion_tokens,
+                cache_read_input_tokens,
+                cache_creation_input_tokens,
+            )
+    judgeval_logger.warning(f"Unsupported client type: {type(client)}")
+    return None, None
+def _create_usage(
+    model_name: str,
+    prompt_tokens: int,
+    completion_tokens: int,
+    cache_read_input_tokens: int = 0,
+    cache_creation_input_tokens: int = 0,
+) -> TraceUsage:
+    """Helper function to create TraceUsage object with cost calculation."""
     prompt_cost, completion_cost = cost_per_token(
         model=model_name,
         prompt_tokens=prompt_tokens,
@@ -1871,7 +2177,7 @@ def _format_output_data(
     total_cost_usd = (
         (prompt_cost + completion_cost) if prompt_cost and completion_cost else None
     )
-    usage = TraceUsage(
+    return TraceUsage(
         prompt_tokens=prompt_tokens,
         completion_tokens=completion_tokens,
         total_tokens=prompt_tokens + completion_tokens,
@@ -1882,7 +2188,6 @@ def _format_output_data(
         total_cost_usd=total_cost_usd,
         model_name=model_name,
     )
-    return message_content, usage
 def combine_args_kwargs(func, args, kwargs):
@@ -1940,13 +2245,13 @@ def get_instance_prefixed_name(instance, class_name, class_identifiers):
     """
     if class_name in class_identifiers:
         class_config = class_identifiers[class_name]
-        attr = class_config["identifier"]
-        if hasattr(instance, attr):
-            instance_name = getattr(instance, attr)
-            return instance_name
-        else:
-            raise Exception(
-                f"Attribute {attr} does not exist for {class_name}. Check your identify() decorator."
-            )
-    return None
+        attr = class_config.get("identifier")
+        if attr:
+            if hasattr(instance, attr) and not callable(getattr(instance, attr)):
+                instance_name = getattr(instance, attr)
+                return instance_name
+            else:
+                raise Exception(
+                    f"Attribute {attr} does not exist for {class_name}. Check your agent() decorator."
+                )
+        return None

judgeval 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

judgeval 0.4.0py3-none-any.whl → 0.6.0py3-none-any.whl