PyPI - judgeval - Versions diffs - 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (234) hide show

judgeval/__init__.py +173 -10
judgeval/api/__init__.py +523 -0
judgeval/api/api_types.py +413 -0
judgeval/cli.py +112 -0
judgeval/constants.py +7 -30
judgeval/data/__init__.py +1 -3
judgeval/data/evaluation_run.py +125 -0
judgeval/data/example.py +14 -40
judgeval/data/judgment_types.py +396 -146
judgeval/data/result.py +11 -18
judgeval/data/scorer_data.py +3 -26
judgeval/data/scripts/openapi_transform.py +5 -5
judgeval/data/trace.py +115 -194
judgeval/dataset/__init__.py +335 -0
judgeval/env.py +55 -0
judgeval/evaluation/__init__.py +346 -0
judgeval/exceptions.py +28 -0
judgeval/integrations/langgraph/__init__.py +13 -0
judgeval/integrations/openlit/__init__.py +51 -0
judgeval/judges/__init__.py +2 -2
judgeval/judges/litellm_judge.py +77 -16
judgeval/judges/together_judge.py +88 -17
judgeval/judges/utils.py +7 -20
judgeval/judgment_attribute_keys.py +55 -0
judgeval/{common/logger.py → logger.py} +24 -8
judgeval/prompt/__init__.py +330 -0
judgeval/scorers/__init__.py +11 -11
judgeval/scorers/agent_scorer.py +15 -19
judgeval/scorers/api_scorer.py +21 -23
judgeval/scorers/base_scorer.py +54 -36
judgeval/scorers/example_scorer.py +1 -3
judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
judgeval/scorers/score.py +64 -47
judgeval/scorers/utils.py +2 -107
judgeval/tracer/__init__.py +1111 -2
judgeval/tracer/constants.py +1 -0
judgeval/tracer/exporters/__init__.py +40 -0
judgeval/tracer/exporters/s3.py +119 -0
judgeval/tracer/exporters/store.py +59 -0
judgeval/tracer/exporters/utils.py +32 -0
judgeval/tracer/keys.py +63 -0
judgeval/tracer/llm/__init__.py +7 -0
judgeval/tracer/llm/config.py +78 -0
judgeval/tracer/llm/constants.py +9 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/config.py +6 -0
judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
judgeval/tracer/llm/llm_google/__init__.py +3 -0
judgeval/tracer/llm/llm_google/config.py +6 -0
judgeval/tracer/llm/llm_google/generate_content.py +127 -0
judgeval/tracer/llm/llm_google/wrapper.py +30 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
judgeval/tracer/llm/llm_openai/config.py +6 -0
judgeval/tracer/llm/llm_openai/responses.py +506 -0
judgeval/tracer/llm/llm_openai/utils.py +42 -0
judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
judgeval/tracer/llm/llm_together/__init__.py +3 -0
judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
judgeval/tracer/llm/llm_together/config.py +6 -0
judgeval/tracer/llm/llm_together/wrapper.py +52 -0
judgeval/tracer/llm/providers.py +19 -0
judgeval/tracer/managers.py +167 -0
judgeval/tracer/processors/__init__.py +220 -0
judgeval/tracer/utils.py +19 -0
judgeval/trainer/__init__.py +14 -0
judgeval/trainer/base_trainer.py +122 -0
judgeval/trainer/config.py +123 -0
judgeval/trainer/console.py +144 -0
judgeval/trainer/fireworks_trainer.py +392 -0
judgeval/trainer/trainable_model.py +252 -0
judgeval/trainer/trainer.py +70 -0
judgeval/utils/async_utils.py +39 -0
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +37 -0
judgeval/utils/decorators/use_once.py +13 -0
judgeval/utils/file_utils.py +74 -28
judgeval/utils/guards.py +36 -0
judgeval/utils/meta.py +27 -0
judgeval/utils/project.py +15 -0
judgeval/utils/serialize.py +253 -0
judgeval/utils/testing.py +70 -0
judgeval/utils/url.py +10 -0
judgeval/{version_check.py → utils/version_check.py} +5 -3
judgeval/utils/wrappers/README.md +3 -0
judgeval/utils/wrappers/__init__.py +15 -0
judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
judgeval/utils/wrappers/py.typed +0 -0
judgeval/utils/wrappers/utils.py +35 -0
judgeval/v1/__init__.py +88 -0
judgeval/v1/data/__init__.py +7 -0
judgeval/v1/data/example.py +44 -0
judgeval/v1/data/scorer_data.py +42 -0
judgeval/v1/data/scoring_result.py +44 -0
judgeval/v1/datasets/__init__.py +6 -0
judgeval/v1/datasets/dataset.py +214 -0
judgeval/v1/datasets/dataset_factory.py +94 -0
judgeval/v1/evaluation/__init__.py +6 -0
judgeval/v1/evaluation/evaluation.py +182 -0
judgeval/v1/evaluation/evaluation_factory.py +17 -0
judgeval/v1/instrumentation/__init__.py +6 -0
judgeval/v1/instrumentation/llm/__init__.py +7 -0
judgeval/v1/instrumentation/llm/config.py +78 -0
judgeval/v1/instrumentation/llm/constants.py +11 -0
judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
judgeval/v1/instrumentation/llm/providers.py +19 -0
judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
judgeval/v1/integrations/langgraph/__init__.py +13 -0
judgeval/v1/integrations/openlit/__init__.py +47 -0
judgeval/v1/internal/api/__init__.py +525 -0
judgeval/v1/internal/api/api_types.py +413 -0
judgeval/v1/prompts/__init__.py +6 -0
judgeval/v1/prompts/prompt.py +29 -0
judgeval/v1/prompts/prompt_factory.py +189 -0
judgeval/v1/py.typed +0 -0
judgeval/v1/scorers/__init__.py +6 -0
judgeval/v1/scorers/api_scorer.py +82 -0
judgeval/v1/scorers/base_scorer.py +17 -0
judgeval/v1/scorers/built_in/__init__.py +17 -0
judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
judgeval/v1/scorers/built_in/faithfulness.py +28 -0
judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
judgeval/v1/scorers/scorers_factory.py +49 -0
judgeval/v1/tracer/__init__.py +7 -0
judgeval/v1/tracer/base_tracer.py +520 -0
judgeval/v1/tracer/exporters/__init__.py +14 -0
judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
judgeval/v1/tracer/exporters/span_store.py +50 -0
judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
judgeval/v1/tracer/processors/__init__.py +6 -0
judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
judgeval/v1/tracer/tracer.py +67 -0
judgeval/v1/tracer/tracer_factory.py +38 -0
judgeval/v1/trainers/__init__.py +5 -0
judgeval/v1/trainers/base_trainer.py +62 -0
judgeval/v1/trainers/config.py +123 -0
judgeval/v1/trainers/console.py +144 -0
judgeval/v1/trainers/fireworks_trainer.py +392 -0
judgeval/v1/trainers/trainable_model.py +252 -0
judgeval/v1/trainers/trainers_factory.py +37 -0
judgeval/v1/utils.py +18 -0
judgeval/version.py +5 -0
judgeval/warnings.py +4 -0
judgeval-0.23.0.dist-info/METADATA +266 -0
judgeval-0.23.0.dist-info/RECORD +201 -0
judgeval-0.23.0.dist-info/entry_points.txt +2 -0
judgeval/clients.py +0 -34
judgeval/common/__init__.py +0 -13
judgeval/common/api/__init__.py +0 -3
judgeval/common/api/api.py +0 -352
judgeval/common/api/constants.py +0 -165
judgeval/common/exceptions.py +0 -27
judgeval/common/storage/__init__.py +0 -6
judgeval/common/storage/s3_storage.py +0 -98
judgeval/common/tracer/__init__.py +0 -31
judgeval/common/tracer/constants.py +0 -22
judgeval/common/tracer/core.py +0 -1916
judgeval/common/tracer/otel_exporter.py +0 -108
judgeval/common/tracer/otel_span_processor.py +0 -234
judgeval/common/tracer/span_processor.py +0 -37
judgeval/common/tracer/span_transformer.py +0 -211
judgeval/common/tracer/trace_manager.py +0 -92
judgeval/common/utils.py +0 -940
judgeval/data/datasets/__init__.py +0 -4
judgeval/data/datasets/dataset.py +0 -341
judgeval/data/datasets/eval_dataset_client.py +0 -214
judgeval/data/tool.py +0 -5
judgeval/data/trace_run.py +0 -37
judgeval/evaluation_run.py +0 -75
judgeval/integrations/langgraph.py +0 -843
judgeval/judges/mixture_of_judges.py +0 -286
judgeval/judgment_client.py +0 -369
judgeval/rules.py +0 -521
judgeval/run_evaluation.py +0 -684
judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
judgeval/utils/alerts.py +0 -93
judgeval/utils/requests.py +0 -50
judgeval-0.1.0.dist-info/METADATA +0 -202
judgeval-0.1.0.dist-info/RECORD +0 -73
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
{judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py ADDED Viewed

@@ -0,0 +1,212 @@
+from __future__ import annotations
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Awaitable,
+    Callable,
+    Dict,
+    ParamSpec,
+    TypeVar,
+)
+from opentelemetry.trace import Status, StatusCode
+from judgeval.judgment_attribute_keys import AttributeKeys
+from judgeval.utils.serialize import safe_serialize
+from judgeval.utils.wrappers import (
+    immutable_wrap_sync,
+    immutable_wrap_async,
+)
+from judgeval.v1.instrumentation.llm.llm_openai.utils import (
+    openai_tokens_converter,
+    set_cost_attribute,
+)
+if TYPE_CHECKING:
+    from judgeval.v1.tracer import BaseTracer
+    from openai import OpenAI, AsyncOpenAI
+    from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
+P = ParamSpec("P")
+T = TypeVar("T")
+def wrap_beta_chat_completions_parse_sync(tracer: BaseTracer, client: OpenAI) -> None:
+    original_func = client.beta.chat.completions.parse
+    wrapped = _wrap_beta_non_streaming_sync(tracer, original_func)
+    setattr(client.beta.chat.completions, "parse", wrapped)
+def _wrap_beta_non_streaming_sync(
+    tracer: BaseTracer, original_func: Callable[P, ParsedChatCompletion[T]]
+) -> Callable[P, ParsedChatCompletion[T]]:
+    def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
+        ctx["span"] = tracer.get_tracer().start_span(
+            "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+        )
+        ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
+        ctx["model_name"] = kwargs.get("model", "")
+        ctx["span"].set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
+        )
+    def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
+        span = ctx.get("span")
+        if not span:
+            return
+        span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result))
+        usage_data = result.usage
+        if usage_data:
+            prompt_tokens = usage_data.prompt_tokens or 0
+            completion_tokens = usage_data.completion_tokens or 0
+            cache_read = 0
+            prompt_tokens_details = usage_data.prompt_tokens_details
+            if prompt_tokens_details:
+                cache_read = prompt_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_METADATA,
+                safe_serialize(usage_data),
+            )
+        span.set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
+            result.model or ctx["model_name"],
+        )
+    def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
+        span = ctx.get("span")
+        if span:
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR))
+    def finally_hook(ctx: Dict[str, Any]) -> None:
+        span = ctx.get("span")
+        if span:
+            span.end()
+    return immutable_wrap_sync(
+        original_func,
+        pre_hook=pre_hook,
+        post_hook=post_hook,
+        error_hook=error_hook,
+        finally_hook=finally_hook,
+    )
+def wrap_beta_chat_completions_parse_async(
+    tracer: BaseTracer, client: AsyncOpenAI
+) -> None:
+    original_func = client.beta.chat.completions.parse
+    wrapped = _wrap_beta_non_streaming_async(tracer, original_func)
+    setattr(client.beta.chat.completions, "parse", wrapped)
+def _wrap_beta_non_streaming_async(
+    tracer: BaseTracer, original_func: Callable[P, Awaitable[ParsedChatCompletion[T]]]
+) -> Callable[P, Awaitable[ParsedChatCompletion[T]]]:
+    def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
+        ctx["span"] = tracer.get_tracer().start_span(
+            "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+        )
+        ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
+        ctx["model_name"] = kwargs.get("model", "")
+        ctx["span"].set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
+        )
+    def post_hook(ctx: Dict[str, Any], result: ParsedChatCompletion[T]) -> None:
+        span = ctx.get("span")
+        if not span:
+            return
+        span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result))
+        usage_data = result.usage
+        if usage_data:
+            prompt_tokens = usage_data.prompt_tokens or 0
+            completion_tokens = usage_data.completion_tokens or 0
+            cache_read = 0
+            prompt_tokens_details = usage_data.prompt_tokens_details
+            if prompt_tokens_details:
+                cache_read = prompt_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_METADATA,
+                safe_serialize(usage_data),
+            )
+        span.set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
+            result.model or ctx["model_name"],
+        )
+    def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
+        span = ctx.get("span")
+        if span:
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR))
+    def finally_hook(ctx: Dict[str, Any]) -> None:
+        span = ctx.get("span")
+        if span:
+            span.end()
+    return immutable_wrap_async(
+        original_func,
+        pre_hook=pre_hook,
+        post_hook=post_hook,
+        error_hook=error_hook,
+        finally_hook=finally_hook,
+    )

judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py ADDED Viewed

@@ -0,0 +1,477 @@
+from __future__ import annotations
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Awaitable,
+    Callable,
+    Dict,
+    Iterator,
+    AsyncIterator,
+    Generator,
+    AsyncGenerator,
+    ParamSpec,
+    TypeVar,
+)
+from packaging import version
+from opentelemetry.trace import Status, StatusCode
+from judgeval.judgment_attribute_keys import AttributeKeys
+from judgeval.utils.serialize import safe_serialize
+from judgeval.utils.wrappers import (
+    immutable_wrap_async,
+    immutable_wrap_sync,
+    mutable_wrap_sync,
+    mutable_wrap_async,
+    immutable_wrap_sync_iterator,
+    immutable_wrap_async_iterator,
+)
+from judgeval.v1.instrumentation.llm.llm_openai.utils import (
+    openai_tokens_converter,
+    set_cost_attribute,
+)
+if TYPE_CHECKING:
+    from judgeval.v1.tracer import BaseTracer
+    from openai import OpenAI, AsyncOpenAI
+    from openai.types.chat import ChatCompletion, ChatCompletionChunk
+P = ParamSpec("P")
+T = TypeVar("T")
+def _supports_stream_options() -> bool:
+    try:
+        import openai
+        return version.parse(openai.__version__) >= version.parse("1.26.0")
+    except Exception:
+        return False
+def wrap_chat_completions_create_sync(tracer: BaseTracer, client: OpenAI) -> None:
+    original_func = client.chat.completions.create
+    def dispatcher(*args: Any, **kwargs: Any) -> Any:
+        if kwargs.get("stream", False):
+            return _wrap_streaming_sync(tracer, original_func)(*args, **kwargs)
+        return _wrap_non_streaming_sync(tracer, original_func)(*args, **kwargs)
+    setattr(client.chat.completions, "create", dispatcher)
+def _wrap_non_streaming_sync(
+    tracer: BaseTracer, original_func: Callable[..., ChatCompletion]
+) -> Callable[..., ChatCompletion]:
+    def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
+        ctx["span"] = tracer.get_tracer().start_span(
+            "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+        )
+        ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
+        ctx["model_name"] = kwargs.get("model", "")
+        ctx["span"].set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
+        )
+    def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
+        span = ctx.get("span")
+        if not span:
+            return
+        span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result))
+        usage_data = result.usage
+        if usage_data:
+            prompt_tokens = usage_data.prompt_tokens or 0
+            completion_tokens = usage_data.completion_tokens or 0
+            cache_read = 0
+            prompt_tokens_details = usage_data.prompt_tokens_details
+            if prompt_tokens_details:
+                cache_read = prompt_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_METADATA,
+                safe_serialize(usage_data),
+            )
+        span.set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
+            result.model or ctx["model_name"],
+        )
+    def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
+        span = ctx.get("span")
+        if span:
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR))
+    def finally_hook(ctx: Dict[str, Any]) -> None:
+        span = ctx.get("span")
+        if span:
+            span.end()
+    return immutable_wrap_sync(
+        original_func,
+        pre_hook=pre_hook,
+        post_hook=post_hook,
+        error_hook=error_hook,
+        finally_hook=finally_hook,
+    )
+def _wrap_streaming_sync(
+    tracer: BaseTracer, original_func: Callable[..., Iterator[ChatCompletionChunk]]
+) -> Callable[..., Iterator[ChatCompletionChunk]]:
+    def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
+        ctx["span"] = tracer.get_tracer().start_span(
+            "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+        )
+        ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
+        ctx["model_name"] = kwargs.get("model", "")
+        ctx["span"].set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
+        )
+        ctx["accumulated_content"] = ""
+    def mutate_kwargs_hook(ctx: Dict[str, Any], kwargs: Any) -> Any:
+        if "stream_options" not in kwargs and _supports_stream_options():
+            modified_kwargs = dict(kwargs)
+            modified_kwargs["stream_options"] = {"include_usage": True}
+            return modified_kwargs
+        return kwargs
+    def mutate_hook(
+        ctx: Dict[str, Any], result: Iterator[ChatCompletionChunk]
+    ) -> Iterator[ChatCompletionChunk]:
+        def traced_generator() -> Generator[ChatCompletionChunk, None, None]:
+            for chunk in result:
+                yield chunk
+        def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
+            span = ctx.get("span")
+            if not span:
+                return
+            if chunk.choices and len(chunk.choices) > 0:
+                delta = chunk.choices[0].delta
+                if delta and delta.content:
+                    ctx["accumulated_content"] = (
+                        ctx.get("accumulated_content", "") + delta.content
+                    )
+            if hasattr(chunk, "usage") and chunk.usage:
+                prompt_tokens = chunk.usage.prompt_tokens or 0
+                completion_tokens = chunk.usage.completion_tokens or 0
+                cache_read = 0
+                if chunk.usage.prompt_tokens_details:
+                    cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                set_cost_attribute(span, chunk.usage)
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                    safe_serialize(chunk.usage),
+                )
+        def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
+            span = ctx.get("span")
+            if span:
+                accumulated = ctx.get("accumulated_content", "")
+                span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, accumulated)
+        def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
+            span = ctx.get("span")
+            if span:
+                span.record_exception(error)
+                span.set_status(Status(StatusCode.ERROR))
+        def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
+            span = ctx.get("span")
+            if span:
+                span.end()
+        wrapped_generator = immutable_wrap_sync_iterator(
+            traced_generator,
+            yield_hook=yield_hook,
+            post_hook=post_hook_inner,
+            error_hook=error_hook_inner,
+            finally_hook=finally_hook_inner,
+        )
+        return wrapped_generator()
+    def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
+        span = ctx.get("span")
+        if span:
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR))
+    return mutable_wrap_sync(
+        original_func,
+        pre_hook=pre_hook,
+        mutate_kwargs_hook=mutate_kwargs_hook,
+        mutate_hook=mutate_hook,
+        error_hook=error_hook,
+    )
+def wrap_chat_completions_create_async(tracer: BaseTracer, client: AsyncOpenAI) -> None:
+    original_func = client.chat.completions.create
+    async def dispatcher(*args: Any, **kwargs: Any) -> Any:
+        if kwargs.get("stream", False):
+            return await _wrap_streaming_async(tracer, original_func)(*args, **kwargs)
+        return await _wrap_non_streaming_async(tracer, original_func)(*args, **kwargs)
+    setattr(client.chat.completions, "create", dispatcher)
+def _wrap_non_streaming_async(
+    tracer: BaseTracer, original_func: Callable[..., Awaitable[ChatCompletion]]
+) -> Callable[..., Awaitable[ChatCompletion]]:
+    def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
+        ctx["span"] = tracer.get_tracer().start_span(
+            "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+        )
+        ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
+        ctx["model_name"] = kwargs.get("model", "")
+        ctx["span"].set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
+        )
+    def post_hook(ctx: Dict[str, Any], result: ChatCompletion) -> None:
+        span = ctx.get("span")
+        if not span:
+            return
+        span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, safe_serialize(result))
+        usage_data = result.usage
+        if usage_data:
+            prompt_tokens = usage_data.prompt_tokens or 0
+            completion_tokens = usage_data.completion_tokens or 0
+            cache_read = 0
+            prompt_tokens_details = usage_data.prompt_tokens_details
+            if prompt_tokens_details:
+                cache_read = prompt_tokens_details.cached_tokens or 0
+            set_cost_attribute(span, usage_data)
+            prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                openai_tokens_converter(
+                    prompt_tokens,
+                    completion_tokens,
+                    cache_read,
+                    0,
+                    usage_data.total_tokens,
+                )
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                prompt_tokens,
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+            )
+            span.set_attribute(
+                AttributeKeys.JUDGMENT_USAGE_METADATA,
+                safe_serialize(usage_data),
+            )
+        span.set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME,
+            result.model or ctx["model_name"],
+        )
+    def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
+        span = ctx.get("span")
+        if span:
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR))
+    def finally_hook(ctx: Dict[str, Any]) -> None:
+        span = ctx.get("span")
+        if span:
+            span.end()
+    return immutable_wrap_async(
+        original_func,
+        pre_hook=pre_hook,
+        post_hook=post_hook,
+        error_hook=error_hook,
+        finally_hook=finally_hook,
+    )
+def _wrap_streaming_async(
+    tracer: BaseTracer,
+    original_func: Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]],
+) -> Callable[..., Awaitable[AsyncIterator[ChatCompletionChunk]]]:
+    def pre_hook(ctx: Dict[str, Any], *args: Any, **kwargs: Any) -> None:
+        ctx["span"] = tracer.get_tracer().start_span(
+            "OPENAI_API_CALL", attributes={AttributeKeys.JUDGMENT_SPAN_KIND: "llm"}
+        )
+        ctx["span"].set_attribute(AttributeKeys.GEN_AI_PROMPT, safe_serialize(kwargs))
+        ctx["model_name"] = kwargs.get("model", "")
+        ctx["span"].set_attribute(
+            AttributeKeys.JUDGMENT_LLM_MODEL_NAME, ctx["model_name"]
+        )
+        ctx["accumulated_content"] = ""
+    def mutate_kwargs_hook(ctx: Dict[str, Any], kwargs: Any) -> Any:
+        if "stream_options" not in kwargs and _supports_stream_options():
+            modified_kwargs = dict(kwargs)
+            modified_kwargs["stream_options"] = {"include_usage": True}
+            return modified_kwargs
+        return kwargs
+    def mutate_hook(
+        ctx: Dict[str, Any], result: AsyncIterator[ChatCompletionChunk]
+    ) -> AsyncIterator[ChatCompletionChunk]:
+        async def traced_generator() -> AsyncGenerator[ChatCompletionChunk, None]:
+            async for chunk in result:
+                yield chunk
+        def yield_hook(inner_ctx: Dict[str, Any], chunk: ChatCompletionChunk) -> None:
+            span = ctx.get("span")
+            if not span:
+                return
+            if chunk.choices and len(chunk.choices) > 0:
+                delta = chunk.choices[0].delta
+                if delta and delta.content:
+                    ctx["accumulated_content"] = (
+                        ctx.get("accumulated_content", "") + delta.content
+                    )
+            if hasattr(chunk, "usage") and chunk.usage:
+                prompt_tokens = chunk.usage.prompt_tokens or 0
+                completion_tokens = chunk.usage.completion_tokens or 0
+                cache_read = 0
+                if chunk.usage.prompt_tokens_details:
+                    cache_read = chunk.usage.prompt_tokens_details.cached_tokens or 0
+                set_cost_attribute(span, chunk.usage)
+                prompt_tokens, completion_tokens, cache_read, cache_creation = (
+                    openai_tokens_converter(
+                        prompt_tokens,
+                        completion_tokens,
+                        cache_read,
+                        0,
+                        chunk.usage.total_tokens,
+                    )
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS,
+                    prompt_tokens,
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_OUTPUT_TOKENS, completion_tokens
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS, cache_read
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS, 0
+                )
+                span.set_attribute(
+                    AttributeKeys.JUDGMENT_USAGE_METADATA,
+                    safe_serialize(chunk.usage),
+                )
+        def post_hook_inner(inner_ctx: Dict[str, Any]) -> None:
+            span = ctx.get("span")
+            if span:
+                accumulated = ctx.get("accumulated_content", "")
+                span.set_attribute(AttributeKeys.GEN_AI_COMPLETION, accumulated)
+        def error_hook_inner(inner_ctx: Dict[str, Any], error: Exception) -> None:
+            span = ctx.get("span")
+            if span:
+                span.record_exception(error)
+                span.set_status(Status(StatusCode.ERROR))
+        def finally_hook_inner(inner_ctx: Dict[str, Any]) -> None:
+            span = ctx.get("span")
+            if span:
+                span.end()
+        wrapped_generator = immutable_wrap_async_iterator(
+            traced_generator,
+            yield_hook=yield_hook,
+            post_hook=post_hook_inner,
+            error_hook=error_hook_inner,
+            finally_hook=finally_hook_inner,
+        )
+        return wrapped_generator()
+    def error_hook(ctx: Dict[str, Any], error: Exception) -> None:
+        span = ctx.get("span")
+        if span:
+            span.record_exception(error)
+            span.set_status(Status(StatusCode.ERROR))
+    return mutable_wrap_async(
+        original_func,
+        pre_hook=pre_hook,
+        mutate_kwargs_hook=mutate_kwargs_hook,
+        mutate_hook=mutate_hook,
+        error_hook=error_hook,
+    )

judgeval/v1/instrumentation/llm/llm_openai/config.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+import importlib.util
+HAS_OPENAI = importlib.util.find_spec("openai") is not None
+__all__ = ["HAS_OPENAI"]

judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl

judgeval 0.1.0py3-none-any.whl → 0.23.0py3-none-any.whl