PyPI - judgeval - Versions diffs - 0.19.0__py3-none-any.whl → 0.22.0__py3-none-any.whl - Mend

judgeval 0.19.0py3-none-any.whl → 0.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of judgeval might be problematic. Click here for more details.

Files changed (28) hide show

judgeval/__init__.py +3 -2
judgeval/api/api_types.py +21 -15
judgeval/cli.py +9 -1
judgeval/data/judgment_types.py +21 -20
judgeval/dataset/__init__.py +11 -2
judgeval/env.py +2 -11
judgeval/evaluation/__init__.py +4 -0
judgeval/{prompts/prompt.py → prompt/__init__.py} +30 -20
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +1 -1
judgeval/tracer/__init__.py +38 -16
judgeval/tracer/constants.py +1 -1
judgeval/tracer/keys.py +10 -9
judgeval/tracer/llm/llm_anthropic/messages.py +34 -22
judgeval/tracer/llm/llm_anthropic/messages_stream.py +12 -12
judgeval/tracer/llm/llm_google/generate_content.py +8 -6
judgeval/tracer/llm/llm_openai/beta_chat_completions.py +36 -12
judgeval/tracer/llm/llm_openai/chat_completions.py +75 -22
judgeval/tracer/llm/llm_openai/responses.py +77 -22
judgeval/tracer/llm/llm_openai/utils.py +22 -0
judgeval/tracer/llm/llm_together/chat_completions.py +22 -14
judgeval/utils/guards.py +9 -5
judgeval/utils/serialize.py +2 -2
judgeval/version.py +1 -1
{judgeval-0.19.0.dist-info → judgeval-0.22.0.dist-info}/METADATA +1 -1
{judgeval-0.19.0.dist-info → judgeval-0.22.0.dist-info}/RECORD +28 -27
{judgeval-0.19.0.dist-info → judgeval-0.22.0.dist-info}/WHEEL +0 -0
{judgeval-0.19.0.dist-info → judgeval-0.22.0.dist-info}/entry_points.txt +0 -0
{judgeval-0.19.0.dist-info → judgeval-0.22.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/__init__.py CHANGED Viewed

@@ -146,6 +146,8 @@ class JudgmentClient(metaclass=SingletonMeta):
                 requirements_text = f.read()
         try:
+            if not self.api_key or not self.organization_id:
+                raise ValueError("Judgment API key and organization ID are required")
             client = JudgmentSyncClient(
                 api_key=self.api_key,
                 organization_id=self.organization_id,
@@ -168,8 +170,7 @@ class JudgmentClient(metaclass=SingletonMeta):
                 judgeval_logger.error(f"Failed to upload custom scorer: {unique_name}")
                 return False
-        except Exception as e:
-            judgeval_logger.error(f"Error uploading custom scorer: {e}")
+        except Exception:
             raise

judgeval/api/api_types.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-10-21T01:37:42+00:00
+#   timestamp: 2025-10-25T22:30:20+00:00
 from __future__ import annotations
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
@@ -24,15 +24,6 @@ class DatasetsFetch(TypedDict):
     project_name: str
-class DatasetsTableRow(TypedDict):
-    dataset_id: str
-    name: str
-    created_at: str
-    kind: Literal["trace", "example"]
-    entries: int
-    creator: str
 class ProjectAdd(TypedDict):
     project_name: str
@@ -67,13 +58,9 @@ class SavePromptScorerRequest(TypedDict):
     description: NotRequired[Optional[str]]
-class SavePromptScorerResponse(TypedDict):
-    message: str
-    name: str
 class FetchPromptScorersRequest(TypedDict):
     names: NotRequired[Optional[List[str]]]
+    is_trace: NotRequired[Optional[bool]]
 class CustomScorerUploadPayload(TypedDict):
@@ -193,6 +180,9 @@ DatasetKind = Literal["trace", "example"]
 class PromptScorer(TypedDict):
+    id: str
+    user_id: str
+    organization_id: str
     name: str
     prompt: str
     threshold: float
@@ -202,6 +192,7 @@ class PromptScorer(TypedDict):
     created_at: NotRequired[Optional[str]]
     updated_at: NotRequired[Optional[str]]
     is_trace: NotRequired[Optional[bool]]
+    is_bucket_rubric: NotRequired[Optional[bool]]
 class PromptCommitInfo(TypedDict):
@@ -292,6 +283,7 @@ class TraceEvaluationRun(TypedDict):
     created_at: NotRequired[str]
     trace_and_span_ids: List[TraceAndSpanId]
     is_offline: NotRequired[bool]
+    is_bucket_run: NotRequired[bool]
 class DatasetInsertExamples(TypedDict):
@@ -300,6 +292,15 @@ class DatasetInsertExamples(TypedDict):
     project_name: str
+class DatasetInfo(TypedDict):
+    dataset_id: str
+    name: str
+    created_at: str
+    kind: DatasetKind
+    entries: int
+    creator: str
 class DatasetCreate(TypedDict):
     name: str
     dataset_kind: DatasetKind
@@ -308,6 +309,10 @@ class DatasetCreate(TypedDict):
     overwrite: bool
+class SavePromptScorerResponse(TypedDict):
+    scorer_response: PromptScorer
 class FetchPromptScorersResponse(TypedDict):
     scorers: List[PromptScorer]
@@ -342,6 +347,7 @@ class OtelTraceListItem(TypedDict):
     llm_cost: NotRequired[Optional[float]]
     error: NotRequired[str]
     scores: NotRequired[List[OtelSpanListItemScores]]
+    rules_invoked: NotRequired[List[str]]
     customer_id: NotRequired[Optional[str]]
     input: NotRequired[Optional[str]]
     output: NotRequired[Optional[str]]

judgeval/cli.py CHANGED Viewed

@@ -6,6 +6,7 @@ from dotenv import load_dotenv
 from judgeval.logger import judgeval_logger
 from judgeval import JudgmentClient
 from judgeval.version import get_version
+from judgeval.exceptions import JudgmentAPIError
 load_dotenv()
@@ -56,8 +57,15 @@ def upload_scorer(
             judgeval_logger.error("Failed to upload custom scorer")
             raise typer.Exit(1)
+        judgeval_logger.info("Custom scorer uploaded successfully!")
         raise typer.Exit(0)
-    except Exception:
+    except Exception as e:
+        if isinstance(e, JudgmentAPIError) and e.status_code == 409:
+            judgeval_logger.error(
+                "Duplicate scorer detected. Use --overwrite flag to replace the existing scorer"
+            )
+            raise typer.Exit(1)
+        # Re-raise other exceptions
         raise

judgeval/data/judgment_types.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # generated by datamodel-codegen:
 #   filename:  .openapi.json
-#   timestamp: 2025-10-21T01:37:41+00:00
+#   timestamp: 2025-10-25T22:30:19+00:00
 from __future__ import annotations
 from typing import Annotated, Any, Dict, List, Optional, Union
@@ -26,20 +26,6 @@ class DatasetsFetch(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
-class Kind(Enum):
-    trace = "trace"
-    example = "example"
-class DatasetsTableRow(BaseModel):
-    dataset_id: Annotated[str, Field(title="Dataset Id")]
-    name: Annotated[str, Field(title="Name")]
-    created_at: Annotated[str, Field(title="Created At")]
-    kind: Annotated[Kind, Field(title="Kind")]
-    entries: Annotated[int, Field(title="Entries")]
-    creator: Annotated[str, Field(title="Creator")]
 class ProjectAdd(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
@@ -74,13 +60,9 @@ class SavePromptScorerRequest(BaseModel):
     description: Annotated[Optional[str], Field(title="Description")] = None
-class SavePromptScorerResponse(BaseModel):
-    message: Annotated[str, Field(title="Message")]
-    name: Annotated[str, Field(title="Name")]
 class FetchPromptScorersRequest(BaseModel):
     names: Annotated[Optional[List[str]], Field(title="Names")] = None
+    is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = None
 class CustomScorerUploadPayload(BaseModel):
@@ -211,6 +193,9 @@ class DatasetKind(Enum):
 class PromptScorer(BaseModel):
+    id: Annotated[str, Field(title="Id")]
+    user_id: Annotated[str, Field(title="User Id")]
+    organization_id: Annotated[str, Field(title="Organization Id")]
     name: Annotated[str, Field(title="Name")]
     prompt: Annotated[str, Field(title="Prompt")]
     threshold: Annotated[float, Field(title="Threshold")]
@@ -220,6 +205,7 @@ class PromptScorer(BaseModel):
     created_at: Annotated[Optional[AwareDatetime], Field(title="Created At")] = None
     updated_at: Annotated[Optional[AwareDatetime], Field(title="Updated At")] = None
     is_trace: Annotated[Optional[bool], Field(title="Is Trace")] = False
+    is_bucket_rubric: Annotated[Optional[bool], Field(title="Is Bucket Rubric")] = None
 class PromptCommitInfo(BaseModel):
@@ -326,6 +312,7 @@ class TraceEvaluationRun(BaseModel):
         List[TraceAndSpanId], Field(title="Trace And Span Ids")
     ]
     is_offline: Annotated[Optional[bool], Field(title="Is Offline")] = False
+    is_bucket_run: Annotated[Optional[bool], Field(title="Is Bucket Run")] = False
 class DatasetInsertExamples(BaseModel):
@@ -334,6 +321,15 @@ class DatasetInsertExamples(BaseModel):
     project_name: Annotated[str, Field(title="Project Name")]
+class DatasetInfo(BaseModel):
+    dataset_id: Annotated[str, Field(title="Dataset Id")]
+    name: Annotated[str, Field(title="Name")]
+    created_at: Annotated[str, Field(title="Created At")]
+    kind: DatasetKind
+    entries: Annotated[int, Field(title="Entries")]
+    creator: Annotated[str, Field(title="Creator")]
 class DatasetCreate(BaseModel):
     name: Annotated[str, Field(title="Name")]
     dataset_kind: DatasetKind
@@ -342,6 +338,10 @@ class DatasetCreate(BaseModel):
     overwrite: Annotated[bool, Field(title="Overwrite")]
+class SavePromptScorerResponse(BaseModel):
+    scorer_response: PromptScorer
 class FetchPromptScorersResponse(BaseModel):
     scorers: Annotated[List[PromptScorer], Field(title="Scorers")]
@@ -380,6 +380,7 @@ class OtelTraceListItem(BaseModel):
     scores: Annotated[
         Optional[List[OtelSpanListItemScores]], Field(title="Scores")
     ] = []
+    rules_invoked: Annotated[Optional[List[str]], Field(title="Rules Invoked")] = []
     customer_id: Annotated[Optional[str], Field(title="Customer Id")] = None
     input: Annotated[Optional[str], Field(title="Input")] = None
     output: Annotated[Optional[str], Field(title="Output")] = None

judgeval/dataset/__init__.py CHANGED Viewed

@@ -32,8 +32,8 @@ class Dataset:
     dataset_kind: DatasetKind = DatasetKind.example
     examples: Optional[List[Example]] = None
     traces: Optional[List[Trace]] = None
-    judgment_api_key: str = JUDGMENT_API_KEY or ""
-    organization_id: str = JUDGMENT_ORG_ID or ""
+    judgment_api_key: str | None = JUDGMENT_API_KEY
+    organization_id: str | None = JUDGMENT_ORG_ID
     @classmethod
     def get(
@@ -41,6 +41,8 @@ class Dataset:
         name: str,
         project_name: str,
     ):
+        if not cls.judgment_api_key or not cls.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
         dataset = client.datasets_pull_for_judgeval(
             {
@@ -102,6 +104,8 @@ class Dataset:
         examples: List[Example] = [],
         overwrite: bool = False,
     ):
+        if not cls.judgment_api_key or not cls.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         if not examples:
             examples = []
@@ -125,6 +129,8 @@ class Dataset:
     @classmethod
     def list(cls, project_name: str):
+        if not cls.judgment_api_key or not cls.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         client = JudgmentSyncClient(cls.judgment_api_key, cls.organization_id)
         datasets = client.datasets_pull_all_for_judgeval({"project_name": project_name})
@@ -173,6 +179,9 @@ class Dataset:
         if not isinstance(examples, list):
             raise TypeError("examples must be a list")
+        if not self.judgment_api_key or not self.organization_id:
+            raise ValueError("Judgment API key and organization ID are required")
         client = JudgmentSyncClient(self.judgment_api_key, self.organization_id)
         client.datasets_insert_examples_for_judgeval(
             {

judgeval/env.py CHANGED Viewed

@@ -19,17 +19,8 @@ def optional_env_var(var_name: str, default: str | None = None) -> str | None:
     return os.getenv(var_name, default)
-def required_env_var(var_name: str) -> str:
-    value = os.getenv(var_name)
-    if value is None:
-        raise EnvironmentError(
-            f"Environment variable '{var_name}' is required but not set."
-        )
-    return value
-JUDGMENT_API_KEY = required_env_var("JUDGMENT_API_KEY")
-JUDGMENT_ORG_ID = required_env_var("JUDGMENT_ORG_ID")
+JUDGMENT_API_KEY = optional_env_var("JUDGMENT_API_KEY")
+JUDGMENT_ORG_ID = optional_env_var("JUDGMENT_ORG_ID")
 JUDGMENT_API_URL = optional_env_var("JUDGMENT_API_URL", "https://api.judgmentlabs.ai")
 JUDGMENT_DEFAULT_GPT_MODEL = optional_env_var("JUDGMENT_DEFAULT_GPT_MODEL", "gpt-5")

judgeval/evaluation/__init__.py CHANGED Viewed

@@ -112,6 +112,8 @@ def _poll_evaluation_until_complete(
     poll_count = 0
     exception_count = 0
+    if not JUDGMENT_API_KEY or not JUDGMENT_ORG_ID:
+        raise ValueError("Judgment API key and organization ID are required")
     api_client = JudgmentSyncClient(JUDGMENT_API_KEY, JUDGMENT_ORG_ID)
     while poll_count < max_poll_count:
         poll_count += 1
@@ -222,6 +224,8 @@ def run_eval(
         )
         t.start()
         try:
+            if not JUDGMENT_API_KEY or not JUDGMENT_ORG_ID:
+                raise ValueError("Judgment API key and organization ID are required")
             api_client = JudgmentSyncClient(JUDGMENT_API_KEY, JUDGMENT_ORG_ID)
             response = api_client.add_to_run_eval_queue_examples(
                 evaluation_run.model_dump(warnings=False)  # type: ignore

judgeval/{prompts/prompt.py → prompt/__init__.py} RENAMED Viewed

@@ -19,9 +19,11 @@ def push_prompt(
     name: str,
     prompt: str,
     tags: List[str],
-    judgment_api_key: str = JUDGMENT_API_KEY,
-    organization_id: str = JUDGMENT_ORG_ID,
+    judgment_api_key: str | None = JUDGMENT_API_KEY,
+    organization_id: str | None = JUDGMENT_ORG_ID,
 ) -> tuple[str, Optional[str], str]:
+    if not judgment_api_key or not organization_id:
+        raise ValueError("Judgment API key and organization ID are required")
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
         project_id = _resolve_project_id(
@@ -55,9 +57,11 @@ def fetch_prompt(
     name: str,
     commit_id: Optional[str] = None,
     tag: Optional[str] = None,
-    judgment_api_key: str = JUDGMENT_API_KEY,
-    organization_id: str = JUDGMENT_ORG_ID,
+    judgment_api_key: str | None = JUDGMENT_API_KEY,
+    organization_id: str | None = JUDGMENT_ORG_ID,
 ) -> Optional[PromptCommitInfo]:
+    if not judgment_api_key or not organization_id:
+        raise ValueError("Judgment API key and organization ID are required")
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
         project_id = _resolve_project_id(
@@ -89,9 +93,11 @@ def tag_prompt(
     name: str,
     commit_id: str,
     tags: List[str],
-    judgment_api_key: str = JUDGMENT_API_KEY,
-    organization_id: str = JUDGMENT_ORG_ID,
+    judgment_api_key: str | None = JUDGMENT_API_KEY,
+    organization_id: str | None = JUDGMENT_ORG_ID,
 ) -> PromptTagResponse:
+    if not judgment_api_key or not organization_id:
+        raise ValueError("Judgment API key and organization ID are required")
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
         project_id = _resolve_project_id(
@@ -124,9 +130,11 @@ def untag_prompt(
     project_name: str,
     name: str,
     tags: List[str],
-    judgment_api_key: str = JUDGMENT_API_KEY,
-    organization_id: str = JUDGMENT_ORG_ID,
+    judgment_api_key: str | None = JUDGMENT_API_KEY,
+    organization_id: str | None = JUDGMENT_ORG_ID,
 ) -> PromptUntagResponse:
+    if not judgment_api_key or not organization_id:
+        raise ValueError("Judgment API key and organization ID are required")
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
         project_id = _resolve_project_id(
@@ -153,9 +161,11 @@ def untag_prompt(
 def list_prompt(
     project_name: str,
     name: str,
-    judgment_api_key: str = JUDGMENT_API_KEY,
-    organization_id: str = JUDGMENT_ORG_ID,
+    judgment_api_key: str | None = JUDGMENT_API_KEY,
+    organization_id: str | None = JUDGMENT_ORG_ID,
 ) -> PromptVersionsResponse:
+    if not judgment_api_key or not organization_id:
+        raise ValueError("Judgment API key and organization ID are required")
     client = JudgmentSyncClient(judgment_api_key, organization_id)
     try:
         project_id = _resolve_project_id(
@@ -201,8 +211,8 @@ class Prompt:
         name: str,
         prompt: str,
         tags: Optional[List[str]] = None,
-        judgment_api_key: str = JUDGMENT_API_KEY,
-        organization_id: str = JUDGMENT_ORG_ID,
+        judgment_api_key: str | None = JUDGMENT_API_KEY,
+        organization_id: str | None = JUDGMENT_ORG_ID,
     ):
         if tags is None:
             tags = []
@@ -225,8 +235,8 @@ class Prompt:
         name: str,
         commit_id: Optional[str] = None,
         tag: Optional[str] = None,
-        judgment_api_key: str = JUDGMENT_API_KEY,
-        organization_id: str = JUDGMENT_ORG_ID,
+        judgment_api_key: str | None = JUDGMENT_API_KEY,
+        organization_id: str | None = JUDGMENT_ORG_ID,
     ):
         if commit_id is not None and tag is not None:
             raise ValueError(
@@ -262,8 +272,8 @@ class Prompt:
         name: str,
         commit_id: str,
         tags: List[str],
-        judgment_api_key: str = JUDGMENT_API_KEY,
-        organization_id: str = JUDGMENT_ORG_ID,
+        judgment_api_key: str | None = JUDGMENT_API_KEY,
+        organization_id: str | None = JUDGMENT_ORG_ID,
     ):
         prompt_config = tag_prompt(
             project_name, name, commit_id, tags, judgment_api_key, organization_id
@@ -276,8 +286,8 @@ class Prompt:
         project_name: str,
         name: str,
         tags: List[str],
-        judgment_api_key: str = JUDGMENT_API_KEY,
-        organization_id: str = JUDGMENT_ORG_ID,
+        judgment_api_key: str | None = JUDGMENT_API_KEY,
+        organization_id: str | None = JUDGMENT_ORG_ID,
     ):
         prompt_config = untag_prompt(
             project_name, name, tags, judgment_api_key, organization_id
@@ -289,8 +299,8 @@ class Prompt:
         cls,
         project_name: str,
         name: str,
-        judgment_api_key: str = JUDGMENT_API_KEY,
-        organization_id: str = JUDGMENT_ORG_ID,
+        judgment_api_key: str | None = JUDGMENT_API_KEY,
+        organization_id: str | None = JUDGMENT_ORG_ID,
     ):
         prompt_configs = list_prompt(
             project_name, name, judgment_api_key, organization_id

judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py CHANGED Viewed

@@ -45,7 +45,7 @@ def push_prompt_scorer(
             detail=f"Failed to save prompt scorer: {e.detail}",
             response=e.response,
         )
-    return r["name"]
+    return r["scorer_response"]["name"]
 def fetch_prompt_scorer(

judgeval/tracer/__init__.py CHANGED Viewed

@@ -106,8 +106,8 @@ class Tracer(metaclass=SingletonMeta):
         "_initialized",
     )
-    api_key: str
-    organization_id: str
+    api_key: str | None
+    organization_id: str | None
     project_name: str
     enable_monitoring: bool
     enable_evaluation: bool
@@ -124,8 +124,8 @@ class Tracer(metaclass=SingletonMeta):
         /,
         *,
         project_name: str,
-        api_key: Optional[str] = None,
-        organization_id: Optional[str] = None,
+        api_key: str | None = None,
+        organization_id: str | None = None,
         enable_monitoring: bool = JUDGMENT_ENABLE_MONITORING.lower() == "true",
         enable_evaluation: bool = JUDGMENT_ENABLE_EVALUATIONS.lower() == "true",
         resource_attributes: Optional[Dict[str, Any]] = None,
@@ -145,10 +145,14 @@ class Tracer(metaclass=SingletonMeta):
             self.enable_evaluation = enable_evaluation
             self.resource_attributes = resource_attributes
-            self.api_client = JudgmentSyncClient(
-                api_key=self.api_key,
-                organization_id=self.organization_id,
-            )
+            if self.api_key and self.organization_id:
+                self.api_client = JudgmentSyncClient(
+                    api_key=self.api_key, organization_id=self.organization_id
+                )
+            else:
+                judgeval_logger.error(
+                    "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
+                )
             if initialize:
                 self.initialize()
@@ -162,7 +166,7 @@ class Tracer(metaclass=SingletonMeta):
             project_id = _resolve_project_id(
                 self.project_name, self.api_key, self.organization_id
             )
-            if project_id:
+            if self.api_key and self.organization_id and project_id:
                 self.judgment_processor = self.get_processor(
                     tracer=self,
                     project_name=self.project_name,
@@ -177,9 +181,10 @@ class Tracer(metaclass=SingletonMeta):
                 provider.add_span_processor(self.judgment_processor)
                 set_tracer_provider(provider)
             else:
-                judgeval_logger.error(
-                    f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
-                )
+                if self.api_key and self.organization_id:
+                    judgeval_logger.error(
+                        f"Failed to resolve or autocreate project {self.project_name}, please create it first at https://app.judgmentlabs.ai/org/{self.organization_id}/projects. Skipping Judgment export."
+                    )
         self.tracer = get_tracer_provider().get_tracer(
             JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME,
@@ -198,10 +203,19 @@ class Tracer(metaclass=SingletonMeta):
     ):
         from judgeval.tracer.exporters import JudgmentSpanExporter
+        api_key = api_key or JUDGMENT_API_KEY
+        organization_id = organization_id or JUDGMENT_ORG_ID
+        if not api_key or not organization_id:
+            judgeval_logger.error(
+                "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
+            )
+            return None
         return JudgmentSpanExporter(
             endpoint=url_for("/otel/v1/traces"),
-            api_key=api_key or JUDGMENT_API_KEY,
-            organization_id=organization_id or JUDGMENT_ORG_ID,
+            api_key=api_key,
+            organization_id=organization_id,
             project_id=project_id,
         )
@@ -217,12 +231,19 @@ class Tracer(metaclass=SingletonMeta):
         resource_attributes: Optional[Dict[str, Any]] = None,
     ) -> JudgmentSpanProcessor:
         """Create a JudgmentSpanProcessor using the correct constructor."""
+        api_key = api_key or JUDGMENT_API_KEY
+        organization_id = organization_id or JUDGMENT_ORG_ID
+        if not api_key or not organization_id:
+            judgeval_logger.error(
+                "API Key or Organization ID is not set. You must set them in the environment variables to use the tracer."
+            )
+            return NoOpJudgmentSpanProcessor()
         return JudgmentSpanProcessor(
             tracer,
             project_name,
             project_id,
-            api_key or JUDGMENT_API_KEY,
-            organization_id or JUDGMENT_ORG_ID,
+            api_key,
+            organization_id,
             max_queue_size=max_queue_size,
             export_timeout_millis=export_timeout_millis,
             resource_attributes=resource_attributes,
@@ -244,6 +265,7 @@ class Tracer(metaclass=SingletonMeta):
         """Get the internal span processor of this tracer instance."""
         return self.judgment_processor
+    @dont_throw
     def set_customer_id(self, customer_id: str) -> None:
         if not customer_id:
             judgeval_logger.warning("Customer ID is empty, skipping.")

judgeval/tracer/constants.py CHANGED Viewed

	@@ -1 +1 @@
1	- JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "~~opentelemetry.instrumentation.~~judgeval"
1	+ JUDGEVAL_TRACER_INSTRUMENTING_MODULE_NAME = "judgeval"

judgeval/tracer/keys.py CHANGED Viewed

@@ -26,18 +26,19 @@ class AttributeKeys(str, Enum):
     PENDING_TRACE_EVAL = "judgment.pending_trace_eval"
+    JUDGMENT_LLM_PROVIDER = "judgment.llm.provider"
+    JUDGMENT_LLM_MODEL_NAME = "judgment.llm.model"
+    JUDGMENT_USAGE_NON_CACHED_INPUT_TOKENS = "judgment.usage.non_cached_input_tokens"
+    JUDGMENT_USAGE_CACHE_CREATION_INPUT_TOKENS = (
+        "judgment.usage.cache_creation_input_tokens"
+    )
+    JUDGMENT_USAGE_CACHE_READ_INPUT_TOKENS = "judgment.usage.cache_read_input_tokens"
+    JUDGMENT_USAGE_OUTPUT_TOKENS = "judgment.usage.output_tokens"
+    JUDGMENT_USAGE_TOTAL_COST_USD = "judgment.usage.total_cost_usd"
     GEN_AI_PROMPT = "gen_ai.prompt"
     GEN_AI_COMPLETION = "gen_ai.completion"
-    GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
-    GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
     GEN_AI_SYSTEM = "gen_ai.system"
-    GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
-    GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
-    GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS = (
-        "gen_ai.usage.cache_creation_input_tokens"
-    )
-    GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read_input_tokens"
     GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
     GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
     GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"

judgeval 0.19.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

Potentially problematic release.

judgeval 0.19.0py3-none-any.whl → 0.22.0py3-none-any.whl