PyPI - judgeval - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

judgeval 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

judgeval/cli.py +65 -0
judgeval/common/api/api.py +44 -38
judgeval/common/api/constants.py +18 -5
judgeval/common/api/json_encoder.py +8 -9
judgeval/common/tracer/core.py +448 -256
judgeval/common/tracer/otel_span_processor.py +1 -1
judgeval/common/tracer/span_processor.py +1 -1
judgeval/common/tracer/span_transformer.py +2 -1
judgeval/common/tracer/trace_manager.py +6 -1
judgeval/common/trainer/__init__.py +5 -0
judgeval/common/trainer/config.py +125 -0
judgeval/common/trainer/console.py +151 -0
judgeval/common/trainer/trainable_model.py +238 -0
judgeval/common/trainer/trainer.py +301 -0
judgeval/data/evaluation_run.py +104 -0
judgeval/data/judgment_types.py +37 -8
judgeval/data/trace.py +1 -0
judgeval/data/trace_run.py +0 -2
judgeval/integrations/langgraph.py +2 -1
judgeval/judgment_client.py +90 -135
judgeval/local_eval_queue.py +3 -5
judgeval/run_evaluation.py +43 -299
judgeval/scorers/base_scorer.py +9 -10
judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +17 -3
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/METADATA +10 -47
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/RECORD +29 -22
judgeval-0.7.0.dist-info/entry_points.txt +2 -0
judgeval/evaluation_run.py +0 -80
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/WHEEL +0 -0
{judgeval-0.5.0.dist-info → judgeval-0.7.0.dist-info}/licenses/LICENSE.md +0 -0

judgeval/cli.py ADDED Viewed

@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+import typer
+from pathlib import Path
+from dotenv import load_dotenv
+from judgeval.common.logger import judgeval_logger
+from judgeval.judgment_client import JudgmentClient
+load_dotenv()
+app = typer.Typer(
+    no_args_is_help=True,
+    rich_markup_mode=None,
+    rich_help_panel=None,
+    pretty_exceptions_enable=False,
+    pretty_exceptions_show_locals=False,
+    pretty_exceptions_short=False,
+)
+@app.command("upload_scorer")
+def upload_scorer(
+    scorer_file_path: str,
+    requirements_file_path: str,
+    unique_name: str = typer.Option(
+        None, help="Custom name for the scorer (auto-detected if not provided)"
+    ),
+):
+    # Validate file paths
+    if not Path(scorer_file_path).exists():
+        judgeval_logger.error(f"Scorer file not found: {scorer_file_path}")
+        raise typer.Exit(1)
+    if not Path(requirements_file_path).exists():
+        judgeval_logger.error(f"Requirements file not found: {requirements_file_path}")
+        raise typer.Exit(1)
+    try:
+        client = JudgmentClient()
+        result = client.upload_custom_scorer(
+            scorer_file_path=scorer_file_path,
+            requirements_file_path=requirements_file_path,
+            unique_name=unique_name,
+        )
+        if not result:
+            judgeval_logger.error("Failed to upload custom scorer")
+            raise typer.Exit(1)
+        raise typer.Exit(0)
+    except Exception:
+        raise
+@app.command()
+def version():
+    """Show version info"""
+    judgeval_logger.info("JudgEval CLI v0.0.0")
+if __name__ == "__main__":
+    app()
+# judgeval upload_scorer /Users/alanzhang/repo/JudgmentLabs/judgeval/src/demo/profile_match_scorer.py /Users/alanzhang/repo/JudgmentLabs/judgeval/src/demo/requirements.txt

judgeval/common/api/api.py CHANGED Viewed

@@ -20,13 +20,11 @@ from judgeval.common.api.constants import (
     JUDGMENT_EVAL_DELETE_API_URL,
     JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL,
     JUDGMENT_GET_EVAL_STATUS_API_URL,
-    JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL,
-    JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL,
     JUDGMENT_SCORER_SAVE_API_URL,
     JUDGMENT_SCORER_FETCH_API_URL,
     JUDGMENT_SCORER_EXISTS_API_URL,
+    JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
     JUDGMENT_DATASETS_APPEND_TRACES_API_URL,
-    JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL,
 )
 from judgeval.common.api.constants import (
     TraceFetchPayload,
@@ -45,12 +43,11 @@ from judgeval.common.api.constants import (
     DeleteEvalRunRequestBody,
     EvalLogPayload,
     EvalStatusPayload,
-    CheckExperimentTypePayload,
-    EvalRunNameExistsPayload,
     ScorerSavePayload,
     ScorerFetchPayload,
     ScorerExistsPayload,
-    CheckExampleKeysPayload,
+    CustomScorerUploadPayload,
+    CustomScorerTemplateResponse,
 )
 from judgeval.utils.requests import requests
 from judgeval.common.api.json_encoder import json_encoder
@@ -97,14 +94,20 @@ class JudgmentApiClient:
         method: Literal["POST", "PATCH", "GET", "DELETE"],
         url: str,
         payload: Any,
+        timeout: Optional[Union[float, tuple]] = None,
     ) -> Any:
+        # Prepare request kwargs with optional timeout
+        request_kwargs = self._request_kwargs()
+        if timeout is not None:
+            request_kwargs["timeout"] = timeout
         if method == "GET":
             r = requests.request(
                 method,
                 url,
                 params=payload,
                 headers=self._headers(),
-                **self._request_kwargs(),
+                **request_kwargs,
             )
         else:
             r = requests.request(
@@ -112,7 +115,7 @@ class JudgmentApiClient:
                 url,
                 json=json_encoder(payload),
                 headers=self._headers(),
-                **self._request_kwargs(),
+                **request_kwargs,
             )
         try:
@@ -186,10 +189,10 @@ class JudgmentApiClient:
         payload: EvalLogPayload = {"results": results, "run": run}
         return self._do_request("POST", JUDGMENT_EVAL_LOG_API_URL, payload)
-    def fetch_evaluation_results(self, project_name: str, eval_name: str):
+    def fetch_evaluation_results(self, experiment_run_id: str, project_name: str):
         payload: EvalRunRequestBody = {
             "project_name": project_name,
-            "eval_name": eval_name,
+            "experiment_run_id": experiment_run_id,
         }
         return self._do_request("POST", JUDGMENT_EVAL_FETCH_API_URL, payload)
@@ -204,43 +207,21 @@ class JudgmentApiClient:
     def add_to_evaluation_queue(self, payload: Dict[str, Any]):
         return self._do_request("POST", JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, payload)
-    def get_evaluation_status(self, eval_name: str, project_name: str):
+    def get_evaluation_status(self, experiment_run_id: str, project_name: str):
         payload: EvalStatusPayload = {
-            "eval_name": eval_name,
+            "experiment_run_id": experiment_run_id,
             "project_name": project_name,
             "judgment_api_key": self.api_key,
         }
         return self._do_request("GET", JUDGMENT_GET_EVAL_STATUS_API_URL, payload)
-    def check_experiment_type(self, eval_name: str, project_name: str, is_trace: bool):
-        payload: CheckExperimentTypePayload = {
-            "eval_name": eval_name,
-            "project_name": project_name,
-            "judgment_api_key": self.api_key,
-            "is_trace": is_trace,
-        }
-        return self._do_request("POST", JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL, payload)
-    def check_eval_run_name_exists(self, eval_name: str, project_name: str):
-        payload: EvalRunNameExistsPayload = {
-            "eval_name": eval_name,
-            "project_name": project_name,
-            "judgment_api_key": self.api_key,
-        }
-        return self._do_request("POST", JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL, payload)
-    def check_example_keys(self, keys: List[str], eval_name: str, project_name: str):
-        payload: CheckExampleKeysPayload = {
-            "keys": keys,
-            "eval_name": eval_name,
-            "project_name": project_name,
-        }
-        return self._do_request("POST", JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL, payload)
-    def save_scorer(self, name: str, prompt: str, options: Optional[dict] = None):
+    def save_scorer(
+        self, name: str, prompt: str, threshold: float, options: Optional[dict] = None
+    ):
         payload: ScorerSavePayload = {
             "name": name,
             "prompt": prompt,
+            "threshold": threshold,
             "options": options,
         }
         try:
@@ -292,6 +273,31 @@ class JudgmentApiClient:
                 request=e.request,
             )
+    def upload_custom_scorer(
+        self,
+        scorer_name: str,
+        scorer_code: str,
+        requirements_text: str,
+    ) -> CustomScorerTemplateResponse:
+        """Upload custom scorer to backend"""
+        payload: CustomScorerUploadPayload = {
+            "scorer_name": scorer_name,
+            "scorer_code": scorer_code,
+            "requirements_text": requirements_text,
+        }
+        try:
+            # Use longer timeout for custom scorer upload (5 minutes)
+            response = self._do_request(
+                "POST",
+                JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL,
+                payload,
+                timeout=(10, 300),
+            )
+            return response
+        except JudgmentAPIException as e:
+            raise e
     def push_dataset(
         self,
         dataset_alias: str,

judgeval/common/api/constants.py CHANGED Viewed

@@ -49,9 +49,9 @@ JUDGMENT_EVAL_DELETE_API_URL = (
 JUDGMENT_EVAL_DELETE_PROJECT_API_URL = f"{ROOT_API}/delete_eval_results_by_project/"
 JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL = f"{ROOT_API}/add_to_run_eval_queue/"
 JUDGMENT_GET_EVAL_STATUS_API_URL = f"{ROOT_API}/get_evaluation_status/"
-JUDGMENT_CHECK_EXPERIMENT_TYPE_API_URL = f"{ROOT_API}/check_experiment_type/"
-JUDGMENT_EVAL_RUN_NAME_EXISTS_API_URL = f"{ROOT_API}/eval-run-name-exists/"
-JUDGMENT_CHECK_EXAMPLE_KEYS_API_URL = f"{ROOT_API}/check_example_keys/"
+# Custom Scorers API
+JUDGMENT_CUSTOM_SCORER_UPLOAD_API_URL = f"{ROOT_API}/upload_scorer/"
 # Evaluation API Payloads
@@ -73,9 +73,9 @@ class EvalLogPayload(TypedDict):
 class EvalStatusPayload(TypedDict):
-    eval_name: str
-    project_name: str
+    experiment_run_id: str
     judgment_api_key: str
+    project_name: str
 class CheckExperimentTypePayload(TypedDict):
@@ -162,6 +162,7 @@ JUDGMENT_SCORER_EXISTS_API_URL = f"{ROOT_API}/scorer_exists/"
 class ScorerSavePayload(TypedDict):
     name: str
     prompt: str
+    threshold: float
     options: Optional[dict]
@@ -171,3 +172,15 @@ class ScorerFetchPayload(TypedDict):
 class ScorerExistsPayload(TypedDict):
     name: str
+class CustomScorerUploadPayload(TypedDict):
+    scorer_name: str
+    scorer_code: str
+    requirements_text: str
+class CustomScorerTemplateResponse(TypedDict):
+    scorer_name: str
+    status: str
+    message: str

judgeval/common/api/json_encoder.py CHANGED Viewed

@@ -84,7 +84,7 @@ def json_encoder(
         )
     # Sequences
-    if isinstance(obj, (list, set, frozenset, GeneratorType, tuple, deque)):
+    if isinstance(obj, (list, set, frozenset, tuple, deque)):
         return _dump_sequence(
             obj=obj,
         )
@@ -169,16 +169,15 @@ def _dump_other(
     obj: Any,
 ) -> Any:
     """
-    Dump an object to a hashable object, using the same parameters as jsonable_encoder
+    Dump an object to a representation without iterating it.
+    Avoids calling dict(obj) which can consume iterators/generators or
+    invoke user-defined iteration protocols.
     """
     try:
-        data = dict(obj)
-    except Exception:
         return repr(obj)
-    return json_encoder(
-        data,
-    )
+    except Exception:
+        return str(obj)
 def iso_format(o: Union[datetime.date, datetime.time]) -> str:
@@ -218,7 +217,7 @@ ENCODERS_BY_TYPE: Dict[Type[Any], Callable[[Any], Any]] = {
     Enum: lambda o: o.value,
     frozenset: list,
     deque: list,
-    GeneratorType: list,
+    GeneratorType: repr,
     Path: str,
     Pattern: lambda o: o.pattern,
     SecretBytes: str,

judgeval 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

judgeval 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl