PyPI - judgeval - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

judgeval 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

judgeval/cli.py +65 -0
judgeval/common/api/api.py +44 -38
judgeval/common/api/constants.py +18 -5
judgeval/common/api/json_encoder.py +8 -9
judgeval/common/tracer/core.py +448 -256
judgeval/common/tracer/otel_span_processor.py +1 -1
judgeval/common/tracer/span_processor.py +1 -1
judgeval/common/tracer/span_transformer.py +2 -1
judgeval/common/tracer/trace_manager.py +6 -1
judgeval/common/trainer/__init__.py +5 -0
judgeval/common/trainer/config.py +125 -0
judgeval/common/trainer/console.py +151 -0
judgeval/common/trainer/trainable_model.py +238 -0
judgeval/common/trainer/trainer.py +301 -0
judgeval/data/evaluation_run.py +104 -0
judgeval/data/judgment_types.py +37 -8
judgeval/data/trace.py +1 -0
judgeval/data/trace_run.py +0 -2
judgeval/integrations/langgraph.py +2 -1
judgeval/judgment_client.py +90 -135
judgeval/local_eval_queue.py +3 -5
judgeval/run_evaluation.py +43 -299
judgeval/scorers/base_scorer.py +9 -10
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +17 -3
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/METADATA +10 -47
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/RECORD +29 -22
judgeval-0.7.0.dist-info/entry_points.txt +2 -0
judgeval/evaluation_run.py +0 -80
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/WHEEL +0 -0
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/tracer/core.py CHANGED Viewed

@@ -45,7 +45,7 @@ from judgeval.common.tracer.trace_manager import TraceManagerClient
 from judgeval.data import Example, Trace, TraceSpan, TraceUsage
 from judgeval.scorers import APIScorerConfig, BaseScorer
-from judgeval.evaluation_run import EvaluationRun
+from judgeval.data.evaluation_run import EvaluationRun
 from judgeval.local_eval_queue import LocalEvaluationQueue
 from judgeval.common.api import JudgmentApiClient
 from judgeval.common.utils import OptExcInfo, validate_api_key
@@ -183,8 +183,10 @@ class TraceClient:
         eval_run_name = (
             f"{self.name.capitalize()}-{span_id}-{scorer.score_type.capitalize()}"
         )
-        if isinstance(scorer, APIScorerConfig):
+        hosted_scoring = isinstance(scorer, APIScorerConfig) or (
+            isinstance(scorer, BaseScorer) and scorer.server_hosted
+        )
+        if hosted_scoring:
             eval_run = EvaluationRun(
                 organization_id=self.tracer.organization_id,
                 project_name=self.project_name,
@@ -203,7 +205,7 @@ class TraceClient:
                     self.otel_span_processor.queue_evaluation_run(
                         eval_run, span_id=span_id, span_data=current_span
                     )
-        elif isinstance(scorer, BaseScorer):
+        else:
             # Handle custom scorers using local evaluation queue
             eval_run = EvaluationRun(
                 organization_id=self.tracer.organization_id,
@@ -212,9 +214,7 @@ class TraceClient:
                 examples=[example],
                 scorers=[scorer],
                 model=model,
-                judgment_api_key=self.tracer.api_key,
                 trace_span_id=span_id,
-                trace_id=self.trace_id,
             )
             self.add_eval_run(eval_run, start_time)
@@ -251,6 +251,14 @@ class TraceClient:
             self.otel_span_processor.queue_span_update(span, span_state="agent_name")
+    def record_class_name(self, class_name: str):
+        current_span_id = self.get_current_span()
+        if current_span_id:
+            span = self.span_id_to_span[current_span_id]
+            span.class_name = class_name
+            self.otel_span_processor.queue_span_update(span, span_state="class_name")
     def record_state_before(self, state: dict):
         """Records the agent's state before a tool execution on the current span.
@@ -277,35 +285,13 @@ class TraceClient:
             self.otel_span_processor.queue_span_update(span, span_state="state_after")
-    async def _update_coroutine(self, span: TraceSpan, coroutine: Any, field: str):
-        """Helper method to update the output of a trace entry once the coroutine completes"""
-        try:
-            result = await coroutine
-            setattr(span, field, result)
-            if field == "output":
-                self.otel_span_processor.queue_span_update(span, span_state="output")
-            return result
-        except Exception as e:
-            setattr(span, field, f"Error: {str(e)}")
-            if field == "output":
-                self.otel_span_processor.queue_span_update(span, span_state="output")
-            raise
     def record_output(self, output: Any):
         current_span_id = self.get_current_span()
         if current_span_id:
             span = self.span_id_to_span[current_span_id]
-            span.output = "<pending>" if inspect.iscoroutine(output) else output
-            if inspect.iscoroutine(output):
-                asyncio.create_task(self._update_coroutine(span, output, "output"))
+            span.output = output
-            if not inspect.iscoroutine(output):
-                self.otel_span_processor.queue_span_update(span, span_state="output")
+            self.otel_span_processor.queue_span_update(span, span_state="output")
             return span
         return None
@@ -642,6 +628,7 @@ class _DeepTracer:
         qual_name = self._get_qual_name(frame)
         instance_name = None
+        class_name = None
         if "self" in frame.f_locals:
             instance = frame.f_locals["self"]
             class_name = instance.__class__.__name__
@@ -715,6 +702,7 @@ class _DeepTracer:
                 parent_span_id=parent_span_id,
                 function=qual_name,
                 agent_name=instance_name,
+                class_name=class_name,
             )
             current_trace.add_span(span)
@@ -827,6 +815,8 @@ class Tracer:
         == "true",
         enable_evaluations: bool = os.getenv("JUDGMENT_EVALUATIONS", "true").lower()
         == "true",
+        show_trace_urls: bool = os.getenv("JUDGMENT_SHOW_TRACE_URLS", "true").lower()
+        == "true",
         # S3 configuration
         use_s3: bool = False,
         s3_bucket_name: Optional[str] = None,
@@ -871,6 +861,7 @@ class Tracer:
             self.traces: List[Trace] = []
             self.enable_monitoring: bool = enable_monitoring
             self.enable_evaluations: bool = enable_evaluations
+            self.show_trace_urls: bool = show_trace_urls
             self.class_identifiers: Dict[
                 str, str
             ] = {}  # Dictionary to store class identifiers
@@ -1063,10 +1054,10 @@ class Tracer:
                 # Reset the context variable
                 self.reset_current_trace(token)
-    def identify(
+    def agent(
         self,
-        identifier: str,
-        track_state: bool = False,
+        identifier: Optional[str] = None,
+        track_state: Optional[bool] = False,
         track_attributes: Optional[List[str]] = None,
         field_mappings: Optional[Dict[str, str]] = None,
     ):
@@ -1104,11 +1095,18 @@ class Tracer:
                 "track_state": track_state,
                 "track_attributes": track_attributes,
                 "field_mappings": field_mappings or {},
+                "class_name": class_name,
             }
             return cls
         return decorator
+    def identify(self, *args, **kwargs):
+        judgeval_logger.warning(
+            "identify() is deprecated and may not be supported in future versions of judgeval. Use the agent() decorator instead."
+        )
+        return self.agent(*args, **kwargs)
     def _capture_instance_state(
         self, instance: Any, class_config: Dict[str, Any]
     ) -> Dict[str, Any]:
@@ -1213,125 +1211,256 @@ class Tracer:
         except Exception:
             return func
-        if asyncio.iscoroutinefunction(func):
+        def _record_span_data(span, args, kwargs):
+            """Helper function to record inputs, agent info, and state on a span."""
+            # Get class and agent info
+            class_name = None
+            agent_name = None
+            if args and hasattr(args[0], "__class__"):
+                class_name = args[0].__class__.__name__
+                agent_name = get_instance_prefixed_name(
+                    args[0], class_name, self.class_identifiers
+                )
-            @functools.wraps(func)
-            async def async_wrapper(*args, **kwargs):
-                nonlocal original_span_name
-                class_name = None
-                span_name = original_span_name
-                agent_name = None
+            # Record inputs, agent name, class name
+            inputs = combine_args_kwargs(func, args, kwargs)
+            span.record_input(inputs)
+            if agent_name:
+                span.record_agent_name(agent_name)
+            if class_name and class_name in self.class_identifiers:
+                span.record_class_name(class_name)
+            # Capture state before execution
+            self._conditionally_capture_and_record_state(span, args, is_before=True)
+            return class_name, agent_name
+        def _finalize_span_data(span, result, args):
+            """Helper function to record outputs and final state on a span."""
+            # Record output
+            span.record_output(result)
+            # Capture state after execution
+            self._conditionally_capture_and_record_state(span, args, is_before=False)
+        def _cleanup_trace(current_trace, trace_token, wrapper_type="function"):
+            """Helper function to handle trace cleanup in finally blocks."""
+            try:
+                trace_id, server_response = current_trace.save(final_save=True)
+                complete_trace_data = {
+                    "trace_id": current_trace.trace_id,
+                    "name": current_trace.name,
+                    "project_name": current_trace.project_name,
+                    "created_at": datetime.fromtimestamp(
+                        current_trace.start_time or time.time(),
+                        timezone.utc,
+                    ).isoformat(),
+                    "duration": current_trace.get_duration(),
+                    "trace_spans": [
+                        span.model_dump() for span in current_trace.trace_spans
+                    ],
+                    "evaluation_runs": [
+                        run.model_dump() for run in current_trace.evaluation_runs
+                    ],
+                    "offline_mode": self.offline_mode,
+                    "parent_trace_id": current_trace.parent_trace_id,
+                    "parent_name": current_trace.parent_name,
+                    "customer_id": current_trace.customer_id,
+                    "tags": current_trace.tags,
+                    "metadata": current_trace.metadata,
+                    "update_id": current_trace.update_id,
+                }
+                self.traces.append(complete_trace_data)
+                self.reset_current_trace(trace_token)
+            except Exception as e:
+                judgeval_logger.warning(f"Issue with {wrapper_type} cleanup: {e}")
+        def _execute_in_span(
+            current_trace, span_name, span_type, execution_func, args, kwargs
+        ):
+            """Helper function to execute code within a span context."""
+            with current_trace.span(span_name, span_type=span_type) as span:
+                _record_span_data(span, args, kwargs)
+                try:
+                    result = execution_func()
+                    _finalize_span_data(span, result, args)
+                    return result
+                except Exception as e:
+                    _capture_exception_for_trace(current_trace, sys.exc_info())
+                    raise e
+        async def _execute_in_span_async(
+            current_trace, span_name, span_type, async_execution_func, args, kwargs
+        ):
+            """Helper function to execute async code within a span context."""
+            with current_trace.span(span_name, span_type=span_type) as span:
+                _record_span_data(span, args, kwargs)
+                try:
+                    result = await async_execution_func()
+                    _finalize_span_data(span, result, args)
+                    return result
+                except Exception as e:
+                    _capture_exception_for_trace(current_trace, sys.exc_info())
+                    raise e
+        def _create_new_trace(self, span_name):
+            """Helper function to create a new trace and set it as current."""
+            trace_id = str(uuid.uuid4())
+            project = self.project_name
+            current_trace = TraceClient(
+                self,
+                trace_id,
+                span_name,
+                project_name=project,
+                enable_monitoring=self.enable_monitoring,
+                enable_evaluations=self.enable_evaluations,
+            )
+            trace_token = self.set_current_trace(current_trace)
+            return current_trace, trace_token
+        def _execute_with_auto_trace_creation(
+            span_name, span_type, execution_func, args, kwargs
+        ):
+            """Helper function that handles automatic trace creation and span execution."""
+            current_trace = self.get_current_trace()
+            if not current_trace:
+                current_trace, trace_token = _create_new_trace(self, span_name)
-                if args and hasattr(args[0], "__class__"):
-                    class_name = args[0].__class__.__name__
-                    agent_name = get_instance_prefixed_name(
-                        args[0], class_name, self.class_identifiers
+                try:
+                    result = _execute_in_span(
+                        current_trace,
+                        span_name,
+                        span_type,
+                        execution_func,
+                        args,
+                        kwargs,
                     )
+                    return result
+                finally:
+                    # Cleanup the trace we created
+                    _cleanup_trace(current_trace, trace_token, "auto_trace")
+            else:
+                # Use existing trace
+                return _execute_in_span(
+                    current_trace, span_name, span_type, execution_func, args, kwargs
+                )
-                current_trace = self.get_current_trace()
+        async def _execute_with_auto_trace_creation_async(
+            span_name, span_type, async_execution_func, args, kwargs
+        ):
+            """Helper function that handles automatic trace creation and async span execution."""
+            current_trace = self.get_current_trace()
-                if not current_trace:
-                    trace_id = str(uuid.uuid4())
-                    project = self.project_name
+            if not current_trace:
+                current_trace, trace_token = _create_new_trace(self, span_name)
-                    current_trace = TraceClient(
-                        self,
-                        trace_id,
+                try:
+                    result = await _execute_in_span_async(
+                        current_trace,
                         span_name,
-                        project_name=project,
-                        enable_monitoring=self.enable_monitoring,
-                        enable_evaluations=self.enable_evaluations,
+                        span_type,
+                        async_execution_func,
+                        args,
+                        kwargs,
                     )
+                    return result
+                finally:
+                    # Cleanup the trace we created
+                    _cleanup_trace(current_trace, trace_token, "async_auto_trace")
+            else:
+                # Use existing trace
+                return await _execute_in_span_async(
+                    current_trace,
+                    span_name,
+                    span_type,
+                    async_execution_func,
+                    args,
+                    kwargs,
+                )
-                    trace_token = self.set_current_trace(current_trace)
+        # Check for generator functions first
+        if inspect.isgeneratorfunction(func):
-                    try:
-                        with current_trace.span(span_name, span_type=span_type) as span:
-                            inputs = combine_args_kwargs(func, args, kwargs)
-                            span.record_input(inputs)
-                            if agent_name:
-                                span.record_agent_name(agent_name)
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=True
+            @functools.wraps(func)
+            def generator_wrapper(*args, **kwargs):
+                # Get the generator from the original function
+                generator = func(*args, **kwargs)
+                # Create wrapper generator that creates spans for each yield
+                def traced_generator():
+                    while True:
+                        try:
+                            # Handle automatic trace creation and span execution
+                            item = _execute_with_auto_trace_creation(
+                                original_span_name,
+                                span_type,
+                                lambda: next(generator),
+                                args,
+                                kwargs,
                             )
+                            yield item
+                        except StopIteration:
+                            break
-                            try:
-                                if self.deep_tracing:
-                                    with _DeepTracer(self):
-                                        result = await func(*args, **kwargs)
-                                else:
-                                    result = await func(*args, **kwargs)
-                            except Exception as e:
-                                _capture_exception_for_trace(
-                                    current_trace, sys.exc_info()
-                                )
-                                raise e
+                return traced_generator()
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=False
-                            )
+            return generator_wrapper
+        # Check for async generator functions
+        elif inspect.isasyncgenfunction(func):
-                            span.record_output(result)
-                        return result
-                    finally:
+            @functools.wraps(func)
+            def async_generator_wrapper(*args, **kwargs):
+                # Get the async generator from the original function
+                async_generator = func(*args, **kwargs)
+                # Create wrapper async generator that creates spans for each yield
+                async def traced_async_generator():
+                    while True:
                         try:
-                            complete_trace_data = {
-                                "trace_id": current_trace.trace_id,
-                                "name": current_trace.name,
-                                "created_at": datetime.fromtimestamp(
-                                    current_trace.start_time or time.time(),
-                                    timezone.utc,
-                                ).isoformat(),
-                                "duration": current_trace.get_duration(),
-                                "trace_spans": [
-                                    span.model_dump()
-                                    for span in current_trace.trace_spans
-                                ],
-                                "offline_mode": self.offline_mode,
-                                "parent_trace_id": current_trace.parent_trace_id,
-                                "parent_name": current_trace.parent_name,
-                            }
-                            trace_id, server_response = current_trace.save(
-                                final_save=True
+                            # Handle automatic trace creation and span execution
+                            item = await _execute_with_auto_trace_creation_async(
+                                original_span_name,
+                                span_type,
+                                lambda: async_generator.__anext__(),
+                                args,
+                                kwargs,
                             )
+                            if inspect.iscoroutine(item):
+                                item = await item
+                            yield item
+                        except StopAsyncIteration:
+                            break
-                            self.traces.append(complete_trace_data)
+                return traced_async_generator()
-                            self.reset_current_trace(trace_token)
-                        except Exception as e:
-                            judgeval_logger.warning(f"Issue with async_wrapper: {e}")
-                            pass
-                else:
-                    with current_trace.span(span_name, span_type=span_type) as span:
-                        inputs = combine_args_kwargs(func, args, kwargs)
-                        span.record_input(inputs)
-                        if agent_name:
-                            span.record_agent_name(agent_name)
-                        # Capture state before execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=True
-                        )
+            return async_generator_wrapper
-                        try:
-                            if self.deep_tracing:
-                                with _DeepTracer(self):
-                                    result = await func(*args, **kwargs)
-                            else:
-                                result = await func(*args, **kwargs)
-                        except Exception as e:
-                            _capture_exception_for_trace(current_trace, sys.exc_info())
-                            raise e
-                        # Capture state after execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=False
-                        )
+        elif asyncio.iscoroutinefunction(func):
-                        span.record_output(result)
-                    return result
+            @functools.wraps(func)
+            async def async_wrapper(*args, **kwargs):
+                nonlocal original_span_name
+                span_name = original_span_name
+                async def async_execution():
+                    if self.deep_tracing:
+                        with _DeepTracer(self):
+                            return await func(*args, **kwargs)
+                    else:
+                        return await func(*args, **kwargs)
+                result = await _execute_with_auto_trace_creation_async(
+                    span_name, span_type, async_execution, args, kwargs
+                )
+                return result
             return async_wrapper
         else:
@@ -1339,122 +1468,18 @@ class Tracer:
             @functools.wraps(func)
             def wrapper(*args, **kwargs):
                 nonlocal original_span_name
-                class_name = None
                 span_name = original_span_name
-                agent_name = None
-                if args and hasattr(args[0], "__class__"):
-                    class_name = args[0].__class__.__name__
-                    agent_name = get_instance_prefixed_name(
-                        args[0], class_name, self.class_identifiers
-                    )
-                # Get current trace from context
-                current_trace = self.get_current_trace()
-                # If there's no current trace, create a root trace
-                if not current_trace:
-                    trace_id = str(uuid.uuid4())
-                    project = self.project_name
-                    # Create a new trace client to serve as the root
-                    current_trace = TraceClient(
-                        self,
-                        trace_id,
-                        span_name,
-                        project_name=project,
-                        enable_monitoring=self.enable_monitoring,
-                        enable_evaluations=self.enable_evaluations,
-                    )
-                    trace_token = self.set_current_trace(current_trace)
-                    try:
-                        with current_trace.span(span_name, span_type=span_type) as span:
-                            # Record inputs
-                            inputs = combine_args_kwargs(func, args, kwargs)
-                            span.record_input(inputs)
-                            if agent_name:
-                                span.record_agent_name(agent_name)
-                            # Capture state before execution
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=True
-                            )
-                            try:
-                                if self.deep_tracing:
-                                    with _DeepTracer(self):
-                                        result = func(*args, **kwargs)
-                                else:
-                                    result = func(*args, **kwargs)
-                            except Exception as e:
-                                _capture_exception_for_trace(
-                                    current_trace, sys.exc_info()
-                                )
-                                raise e
-                            # Capture state after execution
-                            self._conditionally_capture_and_record_state(
-                                span, args, is_before=False
-                            )
-                            # Record output
-                            span.record_output(result)
-                        return result
-                    finally:
-                        try:
-                            trace_id, server_response = current_trace.save(
-                                final_save=True
-                            )
+                def sync_execution():
+                    if self.deep_tracing:
+                        with _DeepTracer(self):
+                            return func(*args, **kwargs)
+                    else:
+                        return func(*args, **kwargs)
-                            complete_trace_data = {
-                                "trace_id": current_trace.trace_id,
-                                "name": current_trace.name,
-                                "created_at": datetime.fromtimestamp(
-                                    current_trace.start_time or time.time(),
-                                    timezone.utc,
-                                ).isoformat(),
-                                "duration": current_trace.get_duration(),
-                                "trace_spans": [
-                                    span.model_dump()
-                                    for span in current_trace.trace_spans
-                                ],
-                                "offline_mode": self.offline_mode,
-                                "parent_trace_id": current_trace.parent_trace_id,
-                                "parent_name": current_trace.parent_name,
-                            }
-                            self.traces.append(complete_trace_data)
-                            self.reset_current_trace(trace_token)
-                        except Exception as e:
-                            judgeval_logger.warning(f"Issue with save: {e}")
-                            pass
-                else:
-                    with current_trace.span(span_name, span_type=span_type) as span:
-                        inputs = combine_args_kwargs(func, args, kwargs)
-                        span.record_input(inputs)
-                        if agent_name:
-                            span.record_agent_name(agent_name)
-                        # Capture state before execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=True
-                        )
-                        try:
-                            if self.deep_tracing:
-                                with _DeepTracer(self):
-                                    result = func(*args, **kwargs)
-                            else:
-                                result = func(*args, **kwargs)
-                        except Exception as e:
-                            _capture_exception_for_trace(current_trace, sys.exc_info())
-                            raise e
-                        # Capture state after execution
-                        self._conditionally_capture_and_record_state(
-                            span, args, is_before=False
-                        )
-                        span.record_output(result)
-                    return result
+                return _execute_with_auto_trace_creation(
+                    span_name, span_type, sync_execution, args, kwargs
+                )
             return wrapper
@@ -1709,6 +1734,93 @@ class Tracer:
                     f"Error during background service shutdown: {e}"
                 )
+    def trace_to_message_history(
+        self, trace: Union[Trace, TraceClient]
+    ) -> List[Dict[str, str]]:
+        """
+        Extract message history from a trace for training purposes.
+        This method processes trace spans to reconstruct the conversation flow,
+        extracting messages in chronological order from LLM, user, and tool spans.
+        Args:
+            trace: Trace or TraceClient instance to extract messages from
+        Returns:
+            List of message dictionaries with 'role' and 'content' keys
+        Raises:
+            ValueError: If no trace is provided
+        """
+        if not trace:
+            raise ValueError("No trace provided")
+        # Handle both Trace and TraceClient objects
+        if isinstance(trace, TraceClient):
+            spans = trace.trace_spans
+        else:
+            spans = trace.trace_spans if hasattr(trace, "trace_spans") else []
+        messages = []
+        first_found = False
+        # Process spans in chronological order
+        for span in sorted(
+            spans, key=lambda s: s.created_at if hasattr(s, "created_at") else 0
+        ):
+            # Skip spans without output (except for first LLM span which may have input messages)
+            if span.output is None and span.span_type != "llm":
+                continue
+            if span.span_type == "llm":
+                # For the first LLM span, extract input messages (system + user prompts)
+                if not first_found and hasattr(span, "inputs") and span.inputs:
+                    input_messages = span.inputs.get("messages", [])
+                    if input_messages:
+                        first_found = True
+                        # Add input messages (typically system and user messages)
+                        for msg in input_messages:
+                            if (
+                                isinstance(msg, dict)
+                                and "role" in msg
+                                and "content" in msg
+                            ):
+                                messages.append(
+                                    {"role": msg["role"], "content": msg["content"]}
+                                )
+                # Add assistant response from span output
+                if span.output is not None:
+                    messages.append({"role": "assistant", "content": str(span.output)})
+            elif span.span_type == "user":
+                # Add user messages
+                if span.output is not None:
+                    messages.append({"role": "user", "content": str(span.output)})
+            elif span.span_type == "tool":
+                # Add tool responses as user messages (common pattern in training)
+                if span.output is not None:
+                    messages.append({"role": "user", "content": str(span.output)})
+        return messages
+    def get_current_message_history(self) -> List[Dict[str, str]]:
+        """
+        Get message history from the current trace.
+        Returns:
+            List of message dictionaries from the current trace context
+        Raises:
+            ValueError: If no current trace is found
+        """
+        current_trace = self.get_current_trace()
+        if not current_trace:
+            raise ValueError("No current trace found")
+        return self.trace_to_message_history(current_trace)
 def _get_current_trace(
     trace_across_async_contexts: bool = Tracer.trace_across_async_contexts,
@@ -1724,7 +1836,7 @@ def wrap(
 ) -> Any:
     """
     Wraps an API client to add tracing capabilities.
-    Supports OpenAI, Together, Anthropic, and Google GenAI clients.
+    Supports OpenAI, Together, Anthropic, Google GenAI clients, and TrainableModel.
     Patches both '.create' and Anthropic's '.stream' methods using a wrapper class.
     """
     (
@@ -1849,6 +1961,39 @@ def wrap(
             setattr(client.chat.completions, "create", wrapped(original_create))
         elif isinstance(client, (groq_AsyncGroq)):
             setattr(client.chat.completions, "create", wrapped_async(original_create))
+    # Check for TrainableModel from judgeval.common.trainer
+    try:
+        from judgeval.common.trainer import TrainableModel
+        if isinstance(client, TrainableModel):
+            # Define a wrapper function that can be reapplied to new model instances
+            def wrap_model_instance(model_instance):
+                """Wrap a model instance with tracing functionality"""
+                if hasattr(model_instance, "chat") and hasattr(
+                    model_instance.chat, "completions"
+                ):
+                    if hasattr(model_instance.chat.completions, "create"):
+                        setattr(
+                            model_instance.chat.completions,
+                            "create",
+                            wrapped(model_instance.chat.completions.create),
+                        )
+                    if hasattr(model_instance.chat.completions, "acreate"):
+                        setattr(
+                            model_instance.chat.completions,
+                            "acreate",
+                            wrapped_async(model_instance.chat.completions.acreate),
+                        )
+            # Register the wrapper function with the TrainableModel
+            client._register_tracer_wrapper(wrap_model_instance)
+            # Apply wrapping to the current model
+            wrap_model_instance(client._current_model)
+    except ImportError:
+        pass  # TrainableModel not available
     return client
@@ -1955,6 +2100,22 @@ def _get_client_config(
             return "GROQ_API_CALL", client.chat.completions.create, None, None, None
         elif isinstance(client, (groq_AsyncGroq)):
             return "GROQ_API_CALL", client.chat.completions.create, None, None, None
+    # Check for TrainableModel
+    try:
+        from judgeval.common.trainer import TrainableModel
+        if isinstance(client, TrainableModel):
+            return (
+                "FIREWORKS_TRAINABLE_MODEL_CALL",
+                client._current_model.chat.completions.create,
+                None,
+                None,
+                None,
+            )
+    except ImportError:
+        pass  # TrainableModel not available
     raise ValueError(f"Unsupported client type: {type(client)}")
@@ -2133,6 +2294,37 @@ def _format_output_data(
                 cache_creation_input_tokens,
             )
+    # Check for TrainableModel
+    try:
+        from judgeval.common.trainer import TrainableModel
+        if isinstance(client, TrainableModel):
+            # TrainableModel uses Fireworks LLM internally, so response format should be similar to OpenAI
+            if (
+                hasattr(response, "model")
+                and hasattr(response, "usage")
+                and hasattr(response, "choices")
+            ):
+                model_name = response.model
+                prompt_tokens = response.usage.prompt_tokens if response.usage else 0
+                completion_tokens = (
+                    response.usage.completion_tokens if response.usage else 0
+                )
+                message_content = response.choices[0].message.content
+                # Use LiteLLM cost calculation with fireworks_ai prefix
+                # LiteLLM supports Fireworks AI models for cost calculation when prefixed with "fireworks_ai/"
+                fireworks_model_name = f"fireworks_ai/{model_name}"
+                return message_content, _create_usage(
+                    fireworks_model_name,
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read_input_tokens,
+                    cache_creation_input_tokens,
+                )
+    except ImportError:
+        pass  # TrainableModel not available
     judgeval_logger.warning(f"Unsupported client type: {type(client)}")
     return None, None
@@ -2223,13 +2415,13 @@ def get_instance_prefixed_name(instance, class_name, class_identifiers):
     """
     if class_name in class_identifiers:
         class_config = class_identifiers[class_name]
-        attr = class_config["identifier"]
-        if hasattr(instance, attr):
-            instance_name = getattr(instance, attr)
-            return instance_name
-        else:
-            raise Exception(
-                f"Attribute {attr} does not exist for {class_name}. Check your identify() decorator."
-            )
-    return None
+        attr = class_config.get("identifier")
+        if attr:
+            if hasattr(instance, attr) and not callable(getattr(instance, attr)):
+                instance_name = getattr(instance, attr)
+                return instance_name
+            else:
+                raise Exception(
+                    f"Attribute {attr} does not exist for {class_name}. Check your agent() decorator."
+                )
+        return None

judgeval 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

judgeval 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl