PyPI - judgeval - Versions diffs - 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl - Mend

judgeval 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

judgeval/__init__.py +3 -1
judgeval/common/tracer.py +352 -117
judgeval/constants.py +5 -3
judgeval/data/__init__.py +4 -0
judgeval/data/custom_example.py +18 -0
judgeval/data/datasets/dataset.py +5 -1
judgeval/data/datasets/eval_dataset_client.py +64 -5
judgeval/data/example.py +1 -0
judgeval/data/result.py +7 -6
judgeval/data/sequence.py +55 -0
judgeval/data/sequence_run.py +44 -0
judgeval/evaluation_run.py +12 -7
judgeval/integrations/langgraph.py +89 -72
judgeval/judgment_client.py +70 -68
judgeval/run_evaluation.py +87 -13
judgeval/scorers/__init__.py +2 -0
judgeval/scorers/judgeval_scorer.py +3 -0
judgeval/scorers/judgeval_scorers/__init__.py +7 -0
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -1
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +21 -0
judgeval/scorers/score.py +6 -5
judgeval/version_check.py +22 -0
{judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/METADATA +1 -1
{judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/RECORD +26 -22
judgeval/data/custom_api_example.py +0 -91
{judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/WHEEL +0 -0
{judgeval-0.0.30.dist-info → judgeval-0.0.32.dist-info}/licenses/LICENSE.md +0 -0

judgeval/common/tracer.py CHANGED Viewed

@@ -11,11 +11,12 @@ import time
 import uuid
 import warnings
 import contextvars
+import sys
 from contextlib import contextmanager
 from dataclasses import dataclass, field
 from datetime import datetime
 from http import HTTPStatus
-from typing import Any, Dict, Generator, List, Literal, Optional, Tuple, TypeAlias, Union, Callable, Awaitable
+from typing import Any, Dict, Generator, List, Literal, Optional, Tuple, TypeAlias, Union, Callable, Awaitable, Set
 from rich import print as rprint
 # Third-party imports
@@ -24,9 +25,10 @@ import requests
 from litellm import cost_per_token
 from pydantic import BaseModel
 from rich import print as rprint
-from openai import OpenAI
-from together import Together
-from anthropic import Anthropic
+from openai import OpenAI, AsyncOpenAI
+from together import Together, AsyncTogether
+from anthropic import Anthropic, AsyncAnthropic
+from google import genai
 # Local application/library-specific imports
 from judgeval.constants import (
@@ -37,7 +39,6 @@ from judgeval.constants import (
     RABBITMQ_QUEUE,
     JUDGMENT_TRACES_DELETE_API_URL,
     JUDGMENT_PROJECT_DELETE_API_URL,
-    JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL
 )
 from judgeval.judgment_client import JudgmentClient
 from judgeval.data import Example
@@ -51,10 +52,11 @@ import concurrent.futures
 # Define context variables for tracking the current trace and the current span within a trace
 current_trace_var = contextvars.ContextVar('current_trace', default=None)
-current_span_var = contextvars.ContextVar('current_span', default=None) # NEW: ContextVar for the active span name
+current_span_var = contextvars.ContextVar('current_span', default=None) # ContextVar for the active span name
+in_traced_function_var = contextvars.ContextVar('in_traced_function', default=False) # Track if we're in a traced function
 # Define type aliases for better code readability and maintainability
-ApiClient: TypeAlias = Union[OpenAI, Together, Anthropic]  # Supported API clients
+ApiClient: TypeAlias = Union[OpenAI, Together, Anthropic, AsyncOpenAI, AsyncAnthropic, AsyncTogether, genai.Client, genai.client.AsyncClient]  # Supported API clients
 TraceEntryType = Literal['enter', 'exit', 'output', 'input', 'evaluation']  # Valid trace entry types
 SpanType = Literal['span', 'tool', 'llm', 'evaluation', 'chain']
 @dataclass
@@ -69,11 +71,11 @@ class TraceEntry:
     - evaluation: Evaluation: (evaluation results)
     """
     type: TraceEntryType
-    function: str  # Name of the function being traced
     span_id: str # Unique ID for this specific span instance
     depth: int    # Indentation level for nested calls
-    message: str  # Human-readable description
     created_at: float # Unix timestamp when entry was created, replacing the deprecated 'timestamp' field
+    function: Optional[str] = None  # Name of the function being traced
+    message: Optional[str] = None  # Human-readable description
     duration: Optional[float] = None  # Time taken (for exit/evaluation entries)
     trace_id: str = None # ID of the trace this entry belongs to
     output: Any = None  # Function output value
@@ -229,6 +231,8 @@ class TraceManagerClient:
             raise ValueError(f"Failed to fetch traces: {response.text}")
         return response.json()
     def save_trace(self, trace_data: dict):
         """
@@ -356,6 +360,18 @@ class TraceClient:
         self.executed_tools = []
         self.executed_node_tools = []
         self._span_depths: Dict[str, int] = {} # NEW: To track depth of active spans
+    def get_current_span(self):
+        """Get the current span from the context var"""
+        return current_span_var.get()
+    def set_current_span(self, span: Any):
+        """Set the current span from the context var"""
+        return current_span_var.set(span)
+    def reset_current_span(self, token: Any):
+        """Reset the current span from the context var"""
+        return current_span_var.reset(token)
     @contextmanager
     def span(self, name: str, span_type: SpanType = "span"):
@@ -874,27 +890,14 @@ class TraceClient:
             "overwrite": overwrite,
             "parent_trace_id": self.parent_trace_id,
             "parent_name": self.parent_name
-        }
-        # Execute asynchrous evaluation in the background
-        # if not empty_save:  # Only send to RabbitMQ if the trace is not empty
-        #     # Send trace data to evaluation queue via API
-        #     try:
-        #         response = requests.post(
-        #             JUDGMENT_TRACES_ADD_TO_EVAL_QUEUE_API_URL,
-        #             json=trace_data,
-        #             headers={
-        #                 "Content-Type": "application/json",
-        #                 "Authorization": f"Bearer {self.tracer.api_key}",
-        #                 "X-Organization-Id": self.tracer.organization_id
-        #             },
-        #             verify=True
-        #         )
-        #         if response.status_code != HTTPStatus.OK:
-        #             warnings.warn(f"Failed to add trace to evaluation queue: {response.text}")
-        #     except Exception as e:
-        #         warnings.warn(f"Error sending trace to evaluation queue: {str(e)}")
+        }
+        # --- Log trace data before saving ---
+        try:
+            rprint(f"[TraceClient.save] Saving trace data for trace_id {self.trace_id}:")
+            rprint(json.dumps(trace_data, indent=2))
+        except Exception as log_e:
+            rprint(f"[TraceClient.save] Error logging trace data: {log_e}")
+        # --- End logging ---
         self.trace_manager_client.save_trace(trace_data)
         return self.trace_id, trace_data
@@ -917,7 +920,8 @@ class Tracer:
         rules: Optional[List[Rule]] = None,  # Added rules parameter
         organization_id: str = os.getenv("JUDGMENT_ORG_ID"),
         enable_monitoring: bool = os.getenv("JUDGMENT_MONITORING", "true").lower() == "true",
-        enable_evaluations: bool = os.getenv("JUDGMENT_EVALUATIONS", "true").lower() == "true"
+        enable_evaluations: bool = os.getenv("JUDGMENT_EVALUATIONS", "true").lower() == "true",
+        deep_tracing: bool = True  # NEW: Enable deep tracing by default
         ):
         if not hasattr(self, 'initialized'):
             if not api_key:
@@ -934,6 +938,7 @@ class Tracer:
             self.initialized: bool = True
             self.enable_monitoring: bool = enable_monitoring
             self.enable_evaluations: bool = enable_evaluations
+            self.deep_tracing: bool = deep_tracing  # NEW: Store deep tracing setting
         elif hasattr(self, 'project_name') and self.project_name != project_name:
             warnings.warn(
                 f"Attempting to initialize Tracer with project_name='{project_name}' but it was already initialized with "
@@ -941,7 +946,59 @@ class Tracer:
                 "To use a different project name, ensure the first Tracer initialization uses the desired project name.",
                 RuntimeWarning
             )
+    def set_current_trace(self, trace: TraceClient):
+        """
+        Set the current trace context in contextvars
+        """
+        current_trace_var.set(trace)
+    def get_current_trace(self) -> Optional[TraceClient]:
+        """
+        Get the current trace context from contextvars
+        """
+        return current_trace_var.get()
+    def _apply_deep_tracing(self, func, span_type="span"):
+        """
+        Apply deep tracing to all functions in the same module as the given function.
+        Args:
+            func: The function being traced
+            span_type: Type of span to use for traced functions
+        Returns:
+            A tuple of (module, original_functions_dict) where original_functions_dict
+            contains the original functions that were replaced with traced versions.
+        """
+        module = inspect.getmodule(func)
+        if not module:
+            return None, {}
+        # Save original functions
+        original_functions = {}
+        # Find all functions in the module
+        for name, obj in inspect.getmembers(module, inspect.isfunction):
+            # Skip already wrapped functions
+            if hasattr(obj, '_judgment_traced'):
+                continue
+            # Create a traced version of the function
+            # Always use default span type "span" for child functions
+            traced_func = _create_deep_tracing_wrapper(obj, self, "span")
+            # Mark the function as traced to avoid double wrapping
+            traced_func._judgment_traced = True
+            # Save the original function
+            original_functions[name] = obj
+            # Replace with traced version
+            setattr(module, name, traced_func)
+        return module, original_functions
     @contextmanager
     def trace(
         self,
@@ -987,14 +1044,8 @@ class Tracer:
             finally:
                 # Reset the context variable
                 current_trace_var.reset(token)
-    def get_current_trace(self) -> Optional[TraceClient]:
-        """
-        Get the current trace context from contextvars
-        """
-        return current_trace_var.get()
-    def observe(self, func=None, *, name=None, span_type: SpanType = "span", project_name: str = None, overwrite: bool = False):
+    def observe(self, func=None, *, name=None, span_type: SpanType = "span", project_name: str = None, overwrite: bool = False, deep_tracing: bool = None):
         """
         Decorator to trace function execution with detailed entry/exit information.
@@ -1004,20 +1055,37 @@ class Tracer:
             span_type: Type of span (default "span")
             project_name: Optional project name override
             overwrite: Whether to overwrite existing traces
+            deep_tracing: Whether to enable deep tracing for this function and all nested calls.
+                          If None, uses the tracer's default setting.
         """
         # If monitoring is disabled, return the function as is
         if not self.enable_monitoring:
             return func if func else lambda f: f
         if func is None:
-            return lambda f: self.observe(f, name=name, span_type=span_type, project_name=project_name, overwrite=overwrite)
+            return lambda f: self.observe(f, name=name, span_type=span_type, project_name=project_name,
+                                         overwrite=overwrite, deep_tracing=deep_tracing)
         # Use provided name or fall back to function name
         span_name = name or func.__name__
+        # Store custom attributes on the function object
+        func._judgment_span_name = span_name
+        func._judgment_span_type = span_type
+        # Use the provided deep_tracing value or fall back to the tracer's default
+        use_deep_tracing = deep_tracing if deep_tracing is not None else self.deep_tracing
         if asyncio.iscoroutinefunction(func):
             @functools.wraps(func)
             async def async_wrapper(*args, **kwargs):
+                # Check if we're already in a traced function
+                if in_traced_function_var.get():
+                    return await func(*args, **kwargs)
+                # Set in_traced_function_var to True
+                token = in_traced_function_var.set(True)
                 # Get current trace from context
                 current_trace = current_trace_var.get()
@@ -1052,9 +1120,18 @@ class Tracer:
                                 'kwargs': kwargs
                             })
+                            # If deep tracing is enabled, apply monkey patching
+                            if use_deep_tracing:
+                                module, original_functions = self._apply_deep_tracing(func, span_type)
                             # Execute function
                             result = await func(*args, **kwargs)
+                            # Restore original functions if deep tracing was enabled
+                            if use_deep_tracing and module and 'original_functions' in locals():
+                                for name, obj in original_functions.items():
+                                    setattr(module, name, obj)
                             # Record output
                             span.record_output(result)
@@ -1064,29 +1141,52 @@ class Tracer:
                     finally:
                         # Reset trace context (span context resets automatically)
                         current_trace_var.reset(trace_token)
+                        # Reset in_traced_function_var
+                        in_traced_function_var.reset(token)
                 else:
                     # Already have a trace context, just create a span in it
                     # The span method handles current_span_var
-                    with current_trace.span(span_name, span_type=span_type) as span: # MODIFIED: Use span_name directly
-                        # Record inputs
-                        span.record_input({
-                            'args': str(args),
-                            'kwargs': kwargs
-                        })
-                        # Execute function
-                        result = await func(*args, **kwargs)
-                        # Record output
-                        span.record_output(result)
+                    try:
+                        with current_trace.span(span_name, span_type=span_type) as span: # MODIFIED: Use span_name directly
+                            # Record inputs
+                            span.record_input({
+                                'args': str(args),
+                                'kwargs': kwargs
+                            })
+                            # If deep tracing is enabled, apply monkey patching
+                            if use_deep_tracing:
+                                module, original_functions = self._apply_deep_tracing(func, span_type)
+                            # Execute function
+                            result = await func(*args, **kwargs)
+                            # Restore original functions if deep tracing was enabled
+                            if use_deep_tracing and module and 'original_functions' in locals():
+                                for name, obj in original_functions.items():
+                                    setattr(module, name, obj)
+                            # Record output
+                            span.record_output(result)
                         return result
+                    finally:
+                        # Reset in_traced_function_var
+                        in_traced_function_var.reset(token)
             return async_wrapper
         else:
-            # Non-async function implementation remains unchanged
+            # Non-async function implementation with deep tracing
             @functools.wraps(func)
             def wrapper(*args, **kwargs):
+                # Check if we're already in a traced function
+                if in_traced_function_var.get():
+                    return func(*args, **kwargs)
+                # Set in_traced_function_var to True
+                token = in_traced_function_var.set(True)
                 # Get current trace from context
                 current_trace = current_trace_var.get()
@@ -1121,9 +1221,18 @@ class Tracer:
                                 'kwargs': kwargs
                             })
+                            # If deep tracing is enabled, apply monkey patching
+                            if use_deep_tracing:
+                                module, original_functions = self._apply_deep_tracing(func, span_type)
                             # Execute function
                             result = func(*args, **kwargs)
+                            # Restore original functions if deep tracing was enabled
+                            if use_deep_tracing and module and 'original_functions' in locals():
+                                for name, obj in original_functions.items():
+                                    setattr(module, name, obj)
                             # Record output
                             span.record_output(result)
@@ -1133,24 +1242,40 @@ class Tracer:
                     finally:
                         # Reset trace context (span context resets automatically)
                         current_trace_var.reset(trace_token)
+                        # Reset in_traced_function_var
+                        in_traced_function_var.reset(token)
                 else:
                     # Already have a trace context, just create a span in it
                     # The span method handles current_span_var
-                    with current_trace.span(span_name, span_type=span_type) as span: # MODIFIED: Use span_name directly
-                        # Record inputs
-                        span.record_input({
-                            'args': str(args),
-                            'kwargs': kwargs
-                        })
-                        # Execute function
-                        result = func(*args, **kwargs)
-                        # Record output
-                        span.record_output(result)
+                    try:
+                        with current_trace.span(span_name, span_type=span_type) as span: # MODIFIED: Use span_name directly
+                            # Record inputs
+                            span.record_input({
+                                'args': str(args),
+                                'kwargs': kwargs
+                            })
+                            # If deep tracing is enabled, apply monkey patching
+                            if use_deep_tracing:
+                                module, original_functions = self._apply_deep_tracing(func, span_type)
+                            # Execute function
+                            result = func(*args, **kwargs)
+                            # Restore original functions if deep tracing was enabled
+                            if use_deep_tracing and module and 'original_functions' in locals():
+                                for name, obj in original_functions.items():
+                                    setattr(module, name, obj)
+                            # Record output
+                            span.record_output(result)
                         return result
+                    finally:
+                        # Reset in_traced_function_var
+                        in_traced_function_var.reset(token)
             return wrapper
     def score(self, func=None, scorers: List[Union[APIJudgmentScorer, JudgevalScorer]] = None, model: str = None, log_results: bool = True, *, name: str = None, span_type: SpanType = "span"):
@@ -1199,34 +1324,69 @@ def wrap(client: Any) -> Any:
     """
     # Get the appropriate configuration for this client type
     span_name, original_create = _get_client_config(client)
-    def traced_create(*args, **kwargs):
-        # Get the current trace from contextvars
-        current_trace = current_trace_var.get()
-        # Skip tracing if no active trace
-        if not current_trace:
-            return original_create(*args, **kwargs)
-        with current_trace.span(span_name, span_type="llm") as span:
-            # Format and record the input parameters
-            input_data = _format_input_data(client, **kwargs)
-            span.record_input(input_data)
-            # Make the actual API call
-            response = original_create(*args, **kwargs)
+    # Handle async clients differently than synchronous clients (need an async function for async clients)
+    if (isinstance(client, (AsyncOpenAI, AsyncAnthropic, AsyncTogether, genai.client.AsyncClient))):
+        async def traced_create(*args, **kwargs):
+            # Get the current trace from contextvars
+            current_trace = current_trace_var.get()
-            # Format and record the output
-            output_data = _format_output_data(client, response)
-            span.record_output(output_data)
+            # Skip tracing if no active trace
+            if not current_trace:
+                return original_create(*args, **kwargs)
+            with current_trace.span(span_name, span_type="llm") as span:
+                # Format and record the input parameters
+                input_data = _format_input_data(client, **kwargs)
+                span.record_input(input_data)
+                # Make the actual API call
+                try:
+                    response = await original_create(*args, **kwargs)
+                except Exception as e:
+                    print(f"Error during API call: {e}")
+                    raise
+                # Format and record the output
+                output_data = _format_output_data(client, response)
+                span.record_output(output_data)
+                return response
+    else:
+        def traced_create(*args, **kwargs):
+            # Get the current trace from contextvars
+            current_trace = current_trace_var.get()
-            return response
+            # Skip tracing if no active trace
+            if not current_trace:
+                return original_create(*args, **kwargs)
+            with current_trace.span(span_name, span_type="llm") as span:
+                # Format and record the input parameters
+                input_data = _format_input_data(client, **kwargs)
+                span.record_input(input_data)
+                # Make the actual API call
+                try:
+                    response = original_create(*args, **kwargs)
+                except Exception as e:
+                    print(f"Error during API call: {e}")
+                    raise
+                # Format and record the output
+                output_data = _format_output_data(client, response)
+                span.record_output(output_data)
+                return response
     # Replace the original method with our traced version
-    if isinstance(client, (OpenAI, Together)):
+    if isinstance(client, (OpenAI, Together, AsyncOpenAI, AsyncTogether)):
         client.chat.completions.create = traced_create
-    elif isinstance(client, Anthropic):
+    elif isinstance(client, (Anthropic, AsyncAnthropic)):
         client.messages.create = traced_create
+    elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
+        client.models.generate_content = traced_create
     return client
@@ -1246,12 +1406,14 @@ def _get_client_config(client: ApiClient) -> tuple[str, callable]:
     Raises:
         ValueError: If client type is not supported
     """
-    if isinstance(client, OpenAI):
+    if isinstance(client, (OpenAI, AsyncOpenAI)):
         return "OPENAI_API_CALL", client.chat.completions.create
-    elif isinstance(client, Together):
+    elif isinstance(client, (Together, AsyncTogether)):
         return "TOGETHER_API_CALL", client.chat.completions.create
-    elif isinstance(client, Anthropic):
+    elif isinstance(client, (Anthropic, AsyncAnthropic)):
         return "ANTHROPIC_API_CALL", client.messages.create
+    elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
+        return "GOOGLE_API_CALL", client.models.generate_content
     raise ValueError(f"Unsupported client type: {type(client)}")
 def _format_input_data(client: ApiClient, **kwargs) -> dict:
@@ -1260,11 +1422,16 @@ def _format_input_data(client: ApiClient, **kwargs) -> dict:
     Extracts relevant parameters from kwargs based on the client type
     to ensure consistent tracing across different APIs.
     """
-    if isinstance(client, (OpenAI, Together)):
+    if isinstance(client, (OpenAI, Together, AsyncOpenAI, AsyncTogether)):
         return {
             "model": kwargs.get("model"),
             "messages": kwargs.get("messages"),
         }
+    elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
+        return {
+            "model": kwargs.get("model"),
+            "contents": kwargs.get("contents")
+        }
     # Anthropic requires additional max_tokens parameter
     return {
         "model": kwargs.get("model"),
@@ -1283,7 +1450,7 @@ def _format_output_data(client: ApiClient, response: Any) -> dict:
             - content: The generated text
             - usage: Token usage statistics
     """
-    if isinstance(client, (OpenAI, Together)):
+    if isinstance(client, (OpenAI, Together, AsyncOpenAI, AsyncTogether)):
         return {
             "content": response.choices[0].message.content,
             "usage": {
@@ -1292,6 +1459,15 @@ def _format_output_data(client: ApiClient, response: Any) -> dict:
                 "total_tokens": response.usage.total_tokens
             }
         }
+    elif isinstance(client, (genai.Client, genai.client.AsyncClient)):
+        return {
+            "content": response.candidates[0].content.parts[0].text,
+            "usage": {
+                "prompt_tokens": response.usage_metadata.prompt_token_count,
+                "completion_tokens": response.usage_metadata.candidates_token_count,
+                "total_tokens": response.usage_metadata.total_token_count
+            }
+        }
     # Anthropic has a different response structure
     return {
         "content": response.content[0].text,
@@ -1302,29 +1478,88 @@ def _format_output_data(client: ApiClient, response: Any) -> dict:
         }
     }
-# Add a global context-preserving gather function
-# async def trace_gather(*coroutines, return_exceptions=False): # REMOVED
-#     """ # REMOVED
-#     A wrapper around asyncio.gather that ensures the trace context # REMOVED
-#     is available within the gathered coroutines using contextvars.copy_context. # REMOVED
-#     """ # REMOVED
-#     # Get the original asyncio.gather (if we patched it) # REMOVED
-#     original_gather = getattr(asyncio, "_original_gather", asyncio.gather) # REMOVED
-# # REMOVED
-#     # Use contextvars.copy_context() to ensure context propagation # REMOVED
-#     ctx = contextvars.copy_context() # REMOVED
-#      # REMOVED
-#     # Wrap the gather call within the copied context # REMOVED
-#     return await ctx.run(original_gather, *coroutines, return_exceptions=return_exceptions) # REMOVED
-# Store the original gather and apply the patch *once*
-# global _original_gather_stored # REMOVED
-# if not globals().get('_original_gather_stored'): # REMOVED
-#     # Check if asyncio.gather is already our wrapper to prevent double patching # REMOVED
-#     if asyncio.gather.__name__ != 'trace_gather':  # REMOVED
-#         asyncio._original_gather = asyncio.gather # REMOVED
-#         asyncio.gather = trace_gather # REMOVED
-#         _original_gather_stored = True # REMOVED
+# Add a new function for deep tracing at the module level
+def _create_deep_tracing_wrapper(func, tracer, span_type="span"):
+    """
+    Creates a wrapper for a function that automatically traces it when called within a traced function.
+    This enables deep tracing without requiring explicit @observe decorators on every function.
+    Args:
+        func: The function to wrap
+        tracer: The Tracer instance
+        span_type: Type of span (default "span")
+    Returns:
+        A wrapped function that will be traced when called
+    """
+    # Skip wrapping if the function is not callable or is a built-in
+    if not callable(func) or isinstance(func, type) or func.__module__ == 'builtins':
+        return func
+    # Get function name for the span - check for custom name set by @observe
+    func_name = getattr(func, '_judgment_span_name', func.__name__)
+    # Check for custom span_type set by @observe
+    func_span_type = getattr(func, '_judgment_span_type', "span")
+    # Store original function to prevent losing reference
+    original_func = func
+    # Create appropriate wrapper based on whether the function is async or not
+    if asyncio.iscoroutinefunction(func):
+        @functools.wraps(func)
+        async def async_deep_wrapper(*args, **kwargs):
+            # Get current trace from context
+            current_trace = current_trace_var.get()
+            # If no trace context, just call the function
+            if not current_trace:
+                return await original_func(*args, **kwargs)
+            # Create a span for this function call - use custom span_type if available
+            with current_trace.span(func_name, span_type=func_span_type) as span:
+                # Record inputs
+                span.record_input({
+                    'args': str(args),
+                    'kwargs': kwargs
+                })
+                # Execute function
+                result = await original_func(*args, **kwargs)
+                # Record output
+                span.record_output(result)
+                return result
+        return async_deep_wrapper
+    else:
+        @functools.wraps(func)
+        def deep_wrapper(*args, **kwargs):
+            # Get current trace from context
+            current_trace = current_trace_var.get()
+            # If no trace context, just call the function
+            if not current_trace:
+                return original_func(*args, **kwargs)
+            # Create a span for this function call - use custom span_type if available
+            with current_trace.span(func_name, span_type=func_span_type) as span:
+                # Record inputs
+                span.record_input({
+                    'args': str(args),
+                    'kwargs': kwargs
+                })
+                # Execute function
+                result = original_func(*args, **kwargs)
+                # Record output
+                span.record_output(result)
+                return result
+        return deep_wrapper
 # Add the new TraceThreadPoolExecutor class
 class TraceThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):

judgeval 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl

judgeval 0.0.30py3-none-any.whl → 0.0.32py3-none-any.whl