PyPI - judgeval - Versions diffs - 0.15.0__py3-none-any.whl → 0.16.1__py3-none-any.whl - Mend

judgeval 0.15.0py3-none-any.whl → 0.16.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

judgeval/api/__init__.py +4 -18
judgeval/api/api_types.py +18 -2
judgeval/data/judgment_types.py +18 -2
judgeval/logger.py +1 -1
judgeval/tracer/__init__.py +10 -7
judgeval/tracer/keys.py +7 -3
judgeval/tracer/llm/__init__.py +2 -1227
judgeval/tracer/llm/config.py +110 -0
judgeval/tracer/llm/constants.py +10 -0
judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
judgeval/tracer/llm/llm_anthropic/wrapper.py +611 -0
judgeval/tracer/llm/llm_google/__init__.py +0 -0
judgeval/tracer/llm/llm_google/config.py +24 -0
judgeval/tracer/llm/llm_google/wrapper.py +426 -0
judgeval/tracer/llm/llm_groq/__init__.py +0 -0
judgeval/tracer/llm/llm_groq/config.py +23 -0
judgeval/tracer/llm/llm_groq/wrapper.py +477 -0
judgeval/tracer/llm/llm_openai/__init__.py +3 -0
judgeval/tracer/llm/llm_openai/wrapper.py +637 -0
judgeval/tracer/llm/llm_together/__init__.py +0 -0
judgeval/tracer/llm/llm_together/config.py +23 -0
judgeval/tracer/llm/llm_together/wrapper.py +478 -0
judgeval/tracer/llm/providers.py +5 -5
judgeval/tracer/processors/__init__.py +1 -1
judgeval/trainer/console.py +1 -1
judgeval/utils/decorators/__init__.py +0 -0
judgeval/utils/decorators/dont_throw.py +21 -0
judgeval/utils/{decorators.py → decorators/use_once.py} +0 -11
judgeval/utils/meta.py +1 -1
judgeval/utils/version_check.py +1 -1
judgeval/version.py +1 -1
judgeval-0.16.1.dist-info/METADATA +266 -0
{judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/RECORD +38 -24
judgeval/tracer/llm/google/__init__.py +0 -21
judgeval/tracer/llm/groq/__init__.py +0 -20
judgeval/tracer/llm/together/__init__.py +0 -20
judgeval-0.15.0.dist-info/METADATA +0 -158
/judgeval/tracer/llm/{anthropic/__init__.py → llm_anthropic/config.py} +0 -0
/judgeval/tracer/llm/{openai/__init__.py → llm_openai/config.py} +0 -0
{judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/WHEEL +0 -0
{judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/entry_points.txt +0 -0
{judgeval-0.15.0.dist-info → judgeval-0.16.1.dist-info}/licenses/LICENSE.md +0 -0

judgeval/api/__init__.py CHANGED Viewed

@@ -73,7 +73,7 @@ class JudgmentSyncClient:
     def evaluate_examples(
         self, payload: ExampleEvaluationRun, stream: Optional[str] = None
-    ) -> Any:
+    ) -> EvaluateResponse:
         query_params = {}
         if stream is not None:
             query_params["stream"] = stream
@@ -86,7 +86,7 @@ class JudgmentSyncClient:
     def evaluate_traces(
         self, payload: TraceEvaluationRun, stream: Optional[str] = None
-    ) -> Any:
+    ) -> EvaluateResponse:
         query_params = {}
         if stream is not None:
             query_params["stream"] = stream
@@ -212,13 +212,6 @@ class JudgmentSyncClient:
             payload,
         )
-    def e2e_fetch_trace_scorer_span_score(self, payload: SpanScoreRequest) -> Any:
-        return self._request(
-            "POST",
-            url_for("/e2e_fetch_trace_scorer_span_score/"),
-            payload,
-        )
 class JudgmentAsyncClient:
     __slots__ = ("api_key", "organization_id", "client")
@@ -270,7 +263,7 @@ class JudgmentAsyncClient:
     async def evaluate_examples(
         self, payload: ExampleEvaluationRun, stream: Optional[str] = None
-    ) -> Any:
+    ) -> EvaluateResponse:
         query_params = {}
         if stream is not None:
             query_params["stream"] = stream
@@ -283,7 +276,7 @@ class JudgmentAsyncClient:
     async def evaluate_traces(
         self, payload: TraceEvaluationRun, stream: Optional[str] = None
-    ) -> Any:
+    ) -> EvaluateResponse:
         query_params = {}
         if stream is not None:
             query_params["stream"] = stream
@@ -411,13 +404,6 @@ class JudgmentAsyncClient:
             payload,
         )
-    async def e2e_fetch_trace_scorer_span_score(self, payload: SpanScoreRequest) -> Any:
-        return await self._request(
-            "POST",
-            url_for("/e2e_fetch_trace_scorer_span_score/"),
-            payload,
-        )
 __all__ = [
     "JudgmentSyncClient",

judgeval/api/api_types.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-09-30T18:06:51+00:00
+#   timestamp: 2025-10-09T00:16:42+00:00
 from __future__ import annotations
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -94,6 +94,7 @@ class ResolveProjectNameRequest(TypedDict):
 class ResolveProjectNameResponse(TypedDict):
     project_id: str
+    project_created: bool
 class TraceIdRequest(TypedDict):
@@ -146,6 +147,14 @@ class ValidationError(TypedDict):
     type: str
+class UsageInfo(TypedDict):
+    total_judgees: int
+    regular_use: int
+    pay_as_you_go_use: int
+    remaining_regular: int
+    remaining_after: int
 DatasetKind = Literal["trace", "example"]
@@ -273,7 +282,6 @@ class OtelTraceListItem(TypedDict):
     trace_id: str
     created_at: str
     duration: NotRequired[Optional[int]]
-    has_notification: NotRequired[Optional[bool]]
     tags: NotRequired[Optional[List[str]]]
     experiment_run_id: NotRequired[Optional[str]]
     span_name: NotRequired[Optional[str]]
@@ -281,6 +289,8 @@ class OtelTraceListItem(TypedDict):
     error: NotRequired[str]
     scores: NotRequired[List[OtelSpanListItemScores]]
     customer_id: NotRequired[Optional[str]]
+    input: NotRequired[Optional[str]]
+    output: NotRequired[Optional[str]]
     input_preview: NotRequired[Optional[str]]
     output_preview: NotRequired[Optional[str]]
     annotation_count: NotRequired[int]
@@ -312,6 +322,12 @@ class OtelSpanDetail(TypedDict):
     scores: NotRequired[Optional[List[OtelSpanDetailScores]]]
+class EvaluateResponse(TypedDict):
+    status: str
+    results: List[ScoringResult]
+    resource_usage: NotRequired[Optional[UsageInfo]]
 class EvalResults(TypedDict):
     results: List[ScoringResult]
     run: Union[ExampleEvaluationRun, TraceEvaluationRun]

judgeval/data/judgment_types.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-09-30T18:06:50+00:00
+#   timestamp: 2025-10-09T00:16:41+00:00
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -101,6 +101,7 @@ class ResolveProjectNameRequest(BaseModel):
 class ResolveProjectNameResponse(BaseModel):
     project_id: Annotated[str, Field(title="Project Id")]
+    project_created: Annotated[bool, Field(title="Project Created")]
 class TraceIdRequest(BaseModel):
@@ -162,6 +163,14 @@ class ValidationError(BaseModel):
     type: Annotated[str, Field(title="Error Type")]
+class UsageInfo(BaseModel):
+    total_judgees: Annotated[int, Field(title="Total Judgees")]
+    regular_use: Annotated[int, Field(title="Regular Use")]
+    pay_as_you_go_use: Annotated[int, Field(title="Pay As You Go Use")]
+    remaining_regular: Annotated[int, Field(title="Remaining Regular")]
+    remaining_after: Annotated[int, Field(title="Remaining After")]
 class DatasetKind(Enum):
     trace = "trace"
     example = "example"
@@ -309,7 +318,6 @@ class OtelTraceListItem(BaseModel):
     trace_id: Annotated[str, Field(title="Trace Id")]
     created_at: Annotated[AwareDatetime, Field(title="Created At")]
     duration: Annotated[Optional[int], Field(title="Duration")] = None
-    has_notification: Annotated[Optional[bool], Field(title="Has Notification")] = None
     tags: Annotated[Optional[List[str]], Field(title="Tags")] = None
     experiment_run_id: Annotated[Optional[str], Field(title="Experiment Run Id")] = None
     span_name: Annotated[Optional[str], Field(title="Span Name")] = None
@@ -319,6 +327,8 @@ class OtelTraceListItem(BaseModel):
         Optional[List[OtelSpanListItemScores]], Field(title="Scores")
     ] = []
     customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
+    input: Annotated[Optional[str], Field(title="Input")] = None
+    output: Annotated[Optional[str], Field(title="Output")] = None
     input_preview: Annotated[Optional[str], Field(title="Input Preview")] = None
     output_preview: Annotated[Optional[str], Field(title="Output Preview")] = None
     annotation_count: Annotated[Optional[int], Field(title="Annotation Count")] = 0
@@ -358,6 +368,12 @@ class OtelSpanDetail(BaseModel):
     )
+class EvaluateResponse(BaseModel):
+    status: Annotated[str, Field(title="Status")]
+    results: Annotated[List[ScoringResult], Field(title="Results")]
+    resource_usage: Optional[UsageInfo] = None
 class EvalResults(BaseModel):
     results: Annotated[List[ScoringResult], Field(title="Results")]
     run: Annotated[Union[ExampleEvaluationRun, TraceEvaluationRun], Field(title="Run")]

judgeval/logger.py CHANGED Viewed

@@ -2,7 +2,7 @@ import logging
 import sys
 from judgeval.env import JUDGMENT_NO_COLOR
-from judgeval.utils.decorators import use_once
+from judgeval.utils.decorators.use_once import use_once
 RESET = "\033[0m"
 RED = "\033[31m"

judgeval/tracer/__init__.py CHANGED Viewed

@@ -55,7 +55,7 @@ from judgeval.tracer.managers import (
     sync_agent_context,
     async_agent_context,
 )
-from judgeval.utils.decorators import dont_throw
+from judgeval.utils.decorators.dont_throw import dont_throw
 from judgeval.utils.guards import expect_api_key, expect_organization_id
 from judgeval.utils.serialize import safe_serialize
 from judgeval.utils.meta import SingletonMeta
@@ -159,11 +159,14 @@ class Tracer(metaclass=SingletonMeta):
         self.judgment_processor = NoOpJudgmentSpanProcessor()
         if self.enable_monitoring:
-            project_id = Tracer._resolve_project_id(
+            project_id, project_created = Tracer._resolve_project_id(
                 self.project_name, self.api_key, self.organization_id
-            )
+            ) or (None, False)
             if project_id:
+                if project_created:
+                    judgeval_logger.info(
+                        f"Project {self.project_name} was autocreated successfully."
+                    )
                 self.judgment_processor = self.get_processor(
                     tracer=self,
                     project_name=self.project_name,
@@ -179,7 +182,7 @@ class Tracer(metaclass=SingletonMeta):
                 set_tracer_provider(provider)
             else:
                 judgeval_logger.error(
-                    f"Failed to resolve project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
+                    f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
                 )
         self.tracer = get_tracer_provider().get_tracer(
@@ -237,14 +240,14 @@ class Tracer(metaclass=SingletonMeta):
     @staticmethod
     def _resolve_project_id(
         project_name: str, api_key: str, organization_id: str
-    ) -> str | None:
+    ) -> Tuple[str, bool]:
         """Resolve project_id from project_name using the API."""
         client = JudgmentSyncClient(
             api_key=api_key,
             organization_id=organization_id,
         )
         response = client.projects_resolve({"project_name": project_name})
-        return response["project_id"]
+        return response["project_id"], response["project_created"]
     def get_current_span(self):
         return get_current_span()

judgeval/tracer/keys.py CHANGED Viewed

@@ -12,6 +12,8 @@ class AttributeKeys(str, Enum):
     JUDGMENT_OFFLINE_MODE = "judgment.offline_mode"
     JUDGMENT_UPDATE_ID = "judgment.update_id"
+    JUDGMENT_USAGE_METADATA = "judgment.usage.metadata"
     JUDGMENT_CUSTOMER_ID = "judgment.customer_id"
     JUDGMENT_AGENT_ID = "judgment.agent_id"
@@ -31,13 +33,15 @@ class AttributeKeys(str, Enum):
     GEN_AI_SYSTEM = "gen_ai.system"
     GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
     GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
-    GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
+    GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
+        "gen_ai.usage.cache_creation_input_tokens"
+    )
+    GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
     GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
     GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
     GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
-    GEN_AI_USAGE_TOTAL_COST = "gen_ai.usage.total_cost_usd"
 class InternalAttributeKeys(str, Enum):
     """

judgeval 0.15.0__py3-none-any.whl → 0.16.1__py3-none-any.whl

judgeval 0.15.0py3-none-any.whl → 0.16.1py3-none-any.whl