PyPI - eval-studio-client - Versions diffs - 0.8.0a2__py3-none-any.whl → 0.8.2__py3-none-any.whl - Mend

eval-studio-client 0.8.0a2py3-none-any.whl → 0.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (573) hide show

eval_studio_client/insights.py CHANGED Viewed

@@ -22,7 +22,7 @@ class Insight:
     _evaluator_id: Optional[str] = None
     @staticmethod
-    def _from_api_insight(api_insight: models.V1alphaInsight) -> "Insight":
+    def _from_api_insight(api_insight: models.V1Insight) -> "Insight":
         """Converts an API Insight to a client Insight."""
         return Insight(
             description=api_insight.description or "",

eval_studio_client/leaderboards.py CHANGED Viewed

@@ -36,7 +36,7 @@ class Leaderboard:
     _report: Optional[str] = None
     _leaderboard: Optional[str] = None
     _model_name: Optional[str] = None
-    _status: Optional[models.V1alphaLeaderboardStatus] = None
+    _status: Optional[models.V1LeaderboardStatus] = None
     _client: Optional[api.ApiClient] = None
     def __post_init__(self):
@@ -60,16 +60,14 @@ class Leaderboard:
     def finished(self) -> bool:
         """Indicates whether the leaderboard has finished."""
         return self._status in [
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_FAILED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
         ]
     @property
     def successful(self) -> bool:
         """Indicates whether the leaderboard has finished successfully."""
-        return (
-            self._status == models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_COMPLETED
-        )
+        return self._status == models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED
     @property
     def test_suite(self) -> List[tests.Test]:
@@ -154,9 +152,9 @@ class Leaderboard:
         raise TimeoutError("Waiting timeout has been reached.")
-    def to_api_proto(self) -> models.V1alphaLeaderboard:
+    def to_api_proto(self) -> models.V1Leaderboard:
         """Converts the client Leaderboard to an API Leaderboard."""
-        return models.V1alphaLeaderboard(
+        return models.V1Leaderboard(
             display_name=self.name,
             description=self.description,
             llm_models=self.base_models or None,
@@ -165,7 +163,7 @@ class Leaderboard:
             model=self._model_name,
         )
-    def _update_result(self, api_leaderboard: models.V1alphaLeaderboard):
+    def _update_result(self, api_leaderboard: models.V1Leaderboard):
         """Refresh the leaderboard with the latest API data."""
         self.key = api_leaderboard.name or ""
         self.update_time = api_leaderboard.update_time
@@ -175,7 +173,7 @@ class Leaderboard:
     @staticmethod
     def _from_api_leaderboard(
-        api_leaderboard: models.V1alphaLeaderboard, client: Optional[api.ApiClient]
+        api_leaderboard: models.V1Leaderboard, client: Optional[api.ApiClient]
     ) -> "Leaderboard":
         """Converts an API Leaderboard to a client Leaderboard."""
         api_problems = api_leaderboard.leaderboard_problems or []
@@ -200,8 +198,8 @@ class Leaderboard:
         )
     @staticmethod
-    def _is_finished_leaderboard(leaderboard: models.V1alphaLeaderboard) -> bool:
+    def _is_finished_leaderboard(leaderboard: models.V1Leaderboard) -> bool:
         return leaderboard.status in [
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
-            models.V1alphaLeaderboardStatus.LEADERBOARD_STATUS_FAILED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
+            models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
         ]

eval_studio_client/models.py CHANGED Viewed

@@ -15,6 +15,12 @@ from eval_studio_client.api import models
 # Key for Azure environment ID parameter within Model parameters.
 _AZURE_ENV_ID_PARAM = "environment_id"
+# Resource name of the default RAG model.
+DEFAULT_RAG_MODEL_KEY = "models/defaultRAGModel"
+# Resource name of the default LLM model.
+DEFAULT_LLM_MODEL_KEY = "models/defaultLLMModel"
 @dataclasses.dataclass
 class Model:
@@ -55,7 +61,7 @@ class Model:
         while True:
             res = self._leaderboard_api.leaderboard_service_list_leaderboards(
                 filter=f'model="{self.key}"',
-                view=models.V1alphaLeaderboardView.LEADERBOARD_VIEW_BASIC_WITH_TABLE,
+                view=models.V1LeaderboardView.LEADERBOARD_VIEW_BASIC_WITH_TABLE,
                 page_token=page_token,
             )
             if not res or not res.leaderboards:
@@ -138,7 +144,7 @@ class Model:
             [test_suites] if isinstance(test_suites, tests.Test) else test_suites
         )
-        create_lb_reqs: List[models.V1alphaCreateLeaderboardRequest] = []
+        create_lb_reqs: List[models.V1CreateLeaderboardRequest] = []
         for evaluator in _evaluators:
             lb = l10s.Leaderboard(
                 key="",
@@ -150,13 +156,13 @@ class Model:
                 _test_names=[t.key for t in _test_suites],
                 _client=self._client,
             )
-            create_lb_req = models.V1alphaCreateLeaderboardRequest(
+            create_lb_req = models.V1CreateLeaderboardRequest(
                 leaderboard=lb.to_api_proto()
             )
             create_lb_reqs.append(create_lb_req)
         res = self._leaderboard_api.leaderboard_service_batch_create_leaderboards(
-            models.V1alphaBatchCreateLeaderboardsRequest(
+            models.V1BatchCreateLeaderboardsRequest(
                 requests=create_lb_reqs,
                 dashboard_display_name=name,
                 dashboard_description=description,
@@ -184,7 +190,7 @@ class Model:
             test_lab: The test lab in JSON format to use for the evaluation.
             description (optional): The description of the leaderboard.
         """
-        req = models.V1alphaImportLeaderboardRequest(
+        req = models.V1ImportLeaderboardRequest(
             testLabJson=test_lab,
             evaluator=evaluator.key,
             model=self.key,
@@ -212,7 +218,7 @@ class Model:
         raise RuntimeError("Failed to list base models")
     def _get_leaderboard_from_operation(
-        self, operation: models.V1alphaOperation
+        self, operation: models.V1Operation
     ) -> Optional[l10s.Leaderboard]:
         """Retrieves the leaderboard from the operation, which created it.
@@ -230,7 +236,7 @@ class Model:
         return None
     def _get_dashboard_from_operation(
-        self, operation: models.V1alphaOperation
+        self, operation: models.V1Operation
     ) -> Optional[d8s.Dashboard]:
         """Retrieves the dashboard from the operation, which created it.
@@ -251,9 +257,7 @@ class Model:
         return None
     @staticmethod
-    def _from_api_model(
-        api_model: models.V1alphaModel, client: api.ApiClient
-    ) -> "Model":
+    def _from_api_model(api_model: models.V1Model, client: api.ApiClient) -> "Model":
         """Converts the API model to the client model."""
         return Model(
             key=api_model.name or "",
@@ -268,10 +272,10 @@ class Model:
         )
     @staticmethod
-    def _is_rag_model(api_model: models.V1alphaModel) -> bool:
+    def _is_rag_model(api_model: models.V1Model) -> bool:
         return api_model.type in [
-            models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_RAG,
-            models.V1alphaModelType.MODEL_TYPE_OPENAI_RAG,
+            models.V1ModelType.MODEL_TYPE_H2_OGPTE_RAG,
+            models.V1ModelType.MODEL_TYPE_OPENAI_RAG,
         ]
@@ -285,6 +289,12 @@ class _Models:
         Args:
             key: The model resource name to retrieve.
+        Returns:
+            Model: The model object.
+        Raises:
+            KeyError: If the model is not found.
         """
         res = self._api.model_service_get_model(key)
         if res and res.model:
@@ -292,6 +302,28 @@ class _Models:
         raise KeyError("Model not found.")
+    def get_default_rag(self) -> Model:
+        """Gets the default RAG model from Eval Studio.
+        Returns:
+            Model: The default RAG model object.
+        Raises:
+            KeyError: If no default RAG model is set.
+        """
+        return self.get(DEFAULT_RAG_MODEL_KEY)
+    def get_default_llm(self) -> Model:
+        """Gets the default LLM model from Eval Studio.
+        Returns:
+            Model: The default LLM model object.
+        Raises:
+            KeyError: If no default LLM model is set.
+        """
+        return self.get(DEFAULT_LLM_MODEL_KEY)
     def create_h2ogpte_model(
         self, name: str, is_rag: bool, description: str, url: str, api_key: str
     ) -> Model:
@@ -308,11 +340,11 @@ class _Models:
             api_key: API key for the model host system.
         """
         model_type = (
-            models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_RAG
+            models.V1ModelType.MODEL_TYPE_H2_OGPTE_RAG
             if is_rag
-            else models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_LLM
+            else models.V1ModelType.MODEL_TYPE_H2_OGPTE_LLM
         )
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
@@ -336,12 +368,12 @@ class _Models:
             url: URL of the model host system.
             api_key: API key for the model host system.
         """
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_H2_OGPT_LLM,
+            type=models.V1ModelType.MODEL_TYPE_H2_OGPT_LLM,
         )
         res = self._api.model_service_create_model(req)
         if res and res.model:
@@ -360,12 +392,12 @@ class _Models:
             url: URL of the model host system.
             api_key: API key for the model host system.
         """
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_H2_OLLMOPS,
+            type=models.V1ModelType.MODEL_TYPE_H2_OLLMOPS,
         )
         res = self._api.model_service_create_model(req)
         if res and res.model:
@@ -399,11 +431,11 @@ class _Models:
             )
         model_type = (
-            models.V1alphaModelType.MODEL_TYPE_OPENAI_RAG
+            models.V1ModelType.MODEL_TYPE_OPENAI_RAG
             if is_rag
-            else models.V1alphaModelType.MODEL_TYPE_OPENAI_CHAT
+            else models.V1ModelType.MODEL_TYPE_OPENAI_CHAT
         )
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             api_key=api_key,
@@ -429,12 +461,12 @@ class _Models:
             environmentID: Azure environment ID.
         """
         params = {_AZURE_ENV_ID_PARAM: environmentID}
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_AZURE_OPENAI_CHAT,
+            type=models.V1ModelType.MODEL_TYPE_AZURE_OPENAI_CHAT,
             parameters=json.dumps(params),
         )
         res = self._api.model_service_create_model(req)
@@ -454,12 +486,12 @@ class _Models:
             url: URL of the model host system.
             api_key: API key for the model host system.
         """
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
             url=url,
             api_key=api_key,
-            type=models.V1alphaModelType.MODEL_TYPE_OLLAMA,
+            type=models.V1ModelType.MODEL_TYPE_OLLAMA,
         )
         res = self._api.model_service_create_model(req)
         if res and res.model:
@@ -491,10 +523,10 @@ class _Models:
             "aws_secret_access_key": aws_secret_access_key,
             "aws_session_token": aws_session_token,
         }
-        req = models.V1alphaModel(
+        req = models.V1Model(
             display_name=name,
             description=description,
-            type=models.V1alphaModelType.MODEL_TYPE_AMAZON_BEDROCK,
+            type=models.V1ModelType.MODEL_TYPE_AMAZON_BEDROCK,
             api_key=json.dumps(credentials),
             parameters=json.dumps({"region": aws_region}),
         )

eval_studio_client/perturbators.py CHANGED Viewed

@@ -17,12 +17,12 @@ class PerturbatorIntensity(enum.Enum):
     medium = "medium"
     high = "high"
-    def to_api_proto(self) -> models.V1alphaPerturbatorIntensity:
+    def to_api_proto(self) -> models.V1PerturbatorIntensity:
         """Converts the client PerturbatorIntensity to an API PerturbatorIntensity."""
         proto_values = {
-            PerturbatorIntensity.low: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_LOW,
-            PerturbatorIntensity.medium: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_MEDIUM,
-            PerturbatorIntensity.high: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_HIGH,
+            PerturbatorIntensity.low: models.V1PerturbatorIntensity.PERTURBATOR_INTENSITY_LOW,
+            PerturbatorIntensity.medium: models.V1PerturbatorIntensity.PERTURBATOR_INTENSITY_MEDIUM,
+            PerturbatorIntensity.high: models.V1PerturbatorIntensity.PERTURBATOR_INTENSITY_HIGH,
         }
         return proto_values[self]
@@ -52,9 +52,7 @@ class Perturbator:
         self.intensity = self.intensity or PerturbatorIntensity.medium
     @staticmethod
-    def _from_api_perturbator(
-        api_perturbator: models.V1alphaPerturbator,
-    ) -> "Perturbator":
+    def _from_api_perturbator(api_perturbator: models.V1Perturbator) -> "Perturbator":
         """Converts an API Perturbator to a client Perturbator."""
         return Perturbator(
             key=api_perturbator.name or "",

eval_studio_client/problems.py CHANGED Viewed

@@ -32,7 +32,7 @@ class Problem:
     _evaluator_id: Optional[str] = None
     @staticmethod
-    def _from_api_problem(api_problem: models.V1alphaProblemAndAction) -> "Problem":
+    def _from_api_problem(api_problem: models.V1ProblemAndAction) -> "Problem":
         """Converts an API Problem to a client Problem."""
         try:
             severity = ProblemSeverity(api_problem.severity)

eval_studio_client/test_labs.py CHANGED Viewed

@@ -98,7 +98,7 @@ class TestLab:
         Args:
             evaluator: The evaluator to use for the evaluation.
         """
-        req = apiModels.V1alphaImportLeaderboardRequest(
+        req = apiModels.V1ImportLeaderboardRequest(
             testLabJson=self.json(),
             evaluator=evaluator.key,
             model=None,
@@ -132,7 +132,7 @@ class TestLab:
         return json.dumps(lab, indent=4, sort_keys=True)
     def _get_leaderboard_from_operation(
-        self, operation: apiModels.V1alphaOperation
+        self, operation: apiModels.V1Operation
     ) -> Optional[l10s.Leaderboard]:
         """Retrieves the leaderboard from the operation, which created it.

eval_studio_client/tests.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import dataclasses
 import datetime
+import enum
 import json
+import time
+from typing import Any
 from typing import List
 from typing import Optional
 from typing import Union
@@ -11,6 +14,121 @@ from eval_studio_client import perturbators as p10s
 from eval_studio_client.api import models
+class TestCaseGenerator(enum.Enum):
+    """Methods used for test case generation."""
+    unspecified = models.V1TestCasesGenerator.TEST_CASES_GENERATOR_UNSPECIFIED
+    simple_factual_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SIMPLE_FACTUAL_QUESTIONS
+    )
+    multi_hop_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTI_HOP_QUESTIONS
+    )
+    inference_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_INFERENCE_QUESTIONS
+    )
+    numerical_reasoning_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NUMERICAL_REASONING_QUESTIONS
+    )
+    ambiguity_handling_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_AMBIGUITY_HANDLING_QUESTIONS
+    )
+    negation_and_contradiction_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NEGATION_AND_CONTRADICTION_QUESTIONS
+    )
+    temporal_reasoning_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_TEMPORAL_REASONING_QUESTIONS
+    )
+    out_of_scope_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_OUT_OF_SCOPE_QUESTIONS
+    )
+    yes_or_no_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_YES_OR_NO_QUESTIONS
+    )
+    multiple_choice_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTIPLE_CHOICE_QUESTIONS
+    )
+    demographic_representation_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DEMOGRAPHIC_REPRESENTATION_QUESTIONS
+    )
+    sentiment_variation_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SENTIMENT_VARIATION_QUESTIONS
+    )
+    irrelevant_information_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_IRRELEVANT_INFORMATION_QUESTIONS
+    )
+    deliberately_misleading_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DELIBERATELY_MISLEADING_QUESTIONS
+    )
+    def to_api_proto(self) -> models.V1TestCasesGenerator:
+        """Converts the client TestCaseGenerator to an API TestCaseGeneration."""
+        proto_values = {
+            TestCaseGenerator.unspecified: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_UNSPECIFIED,
+            TestCaseGenerator.simple_factual_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SIMPLE_FACTUAL_QUESTIONS,
+            TestCaseGenerator.multi_hop_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTI_HOP_QUESTIONS,
+            TestCaseGenerator.inference_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_INFERENCE_QUESTIONS,
+            TestCaseGenerator.numerical_reasoning_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NUMERICAL_REASONING_QUESTIONS,
+            TestCaseGenerator.ambiguity_handling_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_AMBIGUITY_HANDLING_QUESTIONS,
+            TestCaseGenerator.negation_and_contradiction_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NEGATION_AND_CONTRADICTION_QUESTIONS,
+            TestCaseGenerator.temporal_reasoning_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_TEMPORAL_REASONING_QUESTIONS,
+            TestCaseGenerator.out_of_scope_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_OUT_OF_SCOPE_QUESTIONS,
+            TestCaseGenerator.yes_or_no_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_YES_OR_NO_QUESTIONS,
+            TestCaseGenerator.multiple_choice_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTIPLE_CHOICE_QUESTIONS,
+            TestCaseGenerator.demographic_representation_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DEMOGRAPHIC_REPRESENTATION_QUESTIONS,
+            TestCaseGenerator.sentiment_variation_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SENTIMENT_VARIATION_QUESTIONS,
+            TestCaseGenerator.irrelevant_information_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_IRRELEVANT_INFORMATION_QUESTIONS,
+            TestCaseGenerator.deliberately_misleading_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DELIBERATELY_MISLEADING_QUESTIONS,
+        }
+        return proto_values[self]
+@dataclasses.dataclass
+class TestCaseGenerationHandle:
+    name: Any | None
+    create_time: Optional[datetime.datetime] = None
+    creator: Optional[str] = None
+    update_time: Optional[datetime.datetime] = None
+    updater: Optional[str] = None
+    delete_time: Optional[datetime.datetime] = None
+    deleter: Optional[str] = None
+    progress: Optional[float] = None
+    progress_message: Optional[str] = None
+    error: Optional[models.RpcStatus] = None
+    done: Optional[bool] = None
+    @staticmethod
+    def _from_operation(
+        res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
+    ) -> "TestCaseGenerationHandle":
+        """Converts an API operation to prompt generation handle."""
+        op: models.V1Operation | None = res.operation
+        if not op:
+            return TestCaseGenerationHandle(name=None)
+        # progress
+        if hasattr(op, "metadata") and op.metadata:
+            meta_dict = op.metadata.to_dict() or {}
+        else:
+            meta_dict = {}
+        return TestCaseGenerationHandle(
+            name=op.name,
+            create_time=op.create_time,
+            creator=op.creator,
+            update_time=op.update_time,
+            updater=op.updater,
+            delete_time=op.delete_time,
+            deleter=op.deleter,
+            progress=meta_dict.get("progress"),
+            progress_message=meta_dict.get("progressMessage"),
+            error=op.error,
+            done=op.done,
+        )
 @dataclasses.dataclass
 class TestCase:
     """Represents a single test case, which contains tested prompt, expected answer
@@ -35,9 +153,9 @@ class TestCase:
     create_time: Optional[datetime.datetime] = None
     update_time: Optional[datetime.datetime] = None
-    def to_api_proto(self) -> models.V1alphaTestCase:
+    def to_api_proto(self) -> models.V1TestCase:
         """Converts the client TestCase to an API TestCase."""
-        return models.V1alphaTestCase(
+        return models.V1TestCase(
             prompt=self.prompt,
             answer=self.answer,
             constraints=self.constraints,
@@ -45,7 +163,7 @@ class TestCase:
         )
     @staticmethod
-    def _from_api_test_case(api_test_case: models.V1alphaTestCase) -> "TestCase":
+    def _from_api_test_case(api_test_case: models.V1TestCase) -> "TestCase":
         return TestCase(
             key=api_test_case.name or "",
             prompt=api_test_case.prompt or "",
@@ -81,6 +199,7 @@ class Test:
             self._test_api = api.TestServiceApi(self._client)
             self._test_case_api = api.TestCaseServiceApi(self._client)
             self._document_api = api.DocumentServiceApi(self._client)
+            self._operation_api = api.OperationServiceApi(self._client)
     @property
     def test_cases(self) -> List[TestCase]:
@@ -146,6 +265,86 @@ class Test:
         resp = self._test_api.test_service_perturb_test(self.key, req)
         return Test._from_api_test(resp.test, self._client)
+    def generate_test_cases(
+        self,
+        count: int,
+        model: Optional[str] = None,
+        base_llm_model: Optional[str] = None,
+        generators: Optional[List[TestCaseGenerator]] = None,
+    ) -> "TestCaseGenerationHandle":
+        """Generates test cases based on the documents of the Test.
+        Args:
+            count (int): Number of test cases to generate (generator may return fewer
+               prompts).
+            model (str): Model to use for generating the prompts.
+            base_llm_model (str): Base LLM model to use for generating the prompts.
+            generators (List[TestCaseGenerator]): Methods to use for generation.
+        """
+        req = models.TestServiceGenerateTestCasesRequest(
+            count=count,
+            model=model or None,
+            base_llm_model=base_llm_model or None,
+            generators=[g.to_api_proto() for g in generators] if generators else None,
+        )
+        res = self._test_api.test_service_generate_test_cases(self.key, req)
+        return TestCaseGenerationHandle._from_operation(res)
+    def wait_for_test_case_generation(
+        self,
+        handle: TestCaseGenerationHandle,
+        timeout: Optional[float] = None,
+        verbose: bool = False,
+    ) -> TestCaseGenerationHandle:
+        """Waits for the test case generation to finish.
+        Args:
+            handle (TestCaseGenerationHandle): Handle of the test case generation.
+            timeout (float): The maximum time to wait in seconds.
+            verbose (bool): If True, prints the status of the handle while waiting.
+        """
+        if not handle.name:
+            raise ValueError("Test case generation handle is not valid.")
+        elif handle.done:
+            return handle
+        if verbose:
+            print(f"Waiting for test case generation to finish ({handle.name}):")
+        if self._client:
+            # exponential backoff
+            wait_time = 1.0
+            wait_coef = 1.6
+            wait_max = 20.0
+            wait_total = 0.0
+            timeout = timeout or float(2 * 24 * 60 * 60)  # 2 days
+            while wait_total < timeout:
+                handle = TestCaseGenerationHandle._from_operation(
+                    self._operation_api.operation_service_get_operation(handle.name)
+                )
+                if verbose:
+                    if handle.progress or handle.progress_message:
+                        progress = (
+                            int(handle.progress * 100.0) if handle.progress else 0
+                        )
+                        msg = f"{progress:>2}% - '{handle.progress_message}'"
+                    else:
+                        msg = " 0% - 'Initializing'"
+                    print(f"  {msg}")
+                if handle.done:
+                    return handle
+                wait_time *= wait_coef
+                time.sleep(min(wait_time, wait_max))
+        else:
+            raise ValueError("Cannot establish connection to Eval Studio host.")
+        raise TimeoutError("Waiting timeout has been reached.")
     def delete(self, force=False):
         """Deletes the test.
@@ -263,7 +462,7 @@ class Test:
             raise RuntimeError("Failed to unlink the document from the test.") from err
     @staticmethod
-    def _from_api_test(api_test: models.V1alphaTest, client: api.ApiClient) -> "Test":
+    def _from_api_test(api_test: models.V1Test, client: api.ApiClient) -> "Test":
         return Test(
             key=api_test.name or "",
             name=api_test.display_name or "",
@@ -302,7 +501,7 @@ class _Tests:
             documents (optional): List of `Document`s to be attached to the test.
         """
         _documents = [d.key for d in documents] if documents else None
-        test = models.V1alphaTest(
+        test = models.V1Test(
             display_name=name, description=description, documents=_documents
         )
         res = self._api.test_service_create_test(test)
@@ -319,6 +518,24 @@ class _Tests:
         """
         self._api.test_service_delete_test(key)
+    def get(self, key: str) -> Test:
+        """Get the test with given resource name.
+        Args:
+            key (str): Resource name of the test to be get.
+        Returns:
+            An instance of the retrieved `Test`.
+        Raises:
+            KeyError: If the test with the given key does not exist.
+        """
+        api_test = self._api.test_service_get_test(key)
+        if not api_test or not api_test.test:
+            raise KeyError(f"Test with key '{key}' does not exist.")
+        return Test._from_api_test(api_test.test, self._client)
     def import_test_suite(
         self, test_suite: str, name_prefix: Optional[str] = None
     ) -> List[Test]:
@@ -328,7 +545,7 @@ class _Tests:
             test_suite (str): JSON string of the test suite.
             name_prefix (str): Optional prefix to name the imported tests.
         """
-        req = models.V1alphaBatchImportTestsRequest(
+        req = models.V1BatchImportTestsRequest(
             testsJson=test_suite, testDisplayNamePrefix=name_prefix or None
         )
         res = self._api.test_service_batch_import_tests(req)
@@ -360,9 +577,9 @@ class _PerturbatorConfiguration:
             perturbator.params if isinstance(perturbator, p10s.Perturbator) else None
         )
-    def to_api_proto(self) -> models.V1alphaPerturbatorConfiguration:
+    def to_api_proto(self) -> models.V1PerturbatorConfiguration:
         """Converts the client PerturbatorConfiguration to an API PerturbatorConfiguration."""
-        return models.V1alphaPerturbatorConfiguration(
+        return models.V1PerturbatorConfiguration(
             name=self.name,
             intensity=self.intensity.to_api_proto(),
             params=json.dumps(self.params) if self.params else None,

eval-studio-client 0.8.0a2__py3-none-any.whl → 0.8.2__py3-none-any.whl

eval-studio-client 0.8.0a2py3-none-any.whl → 0.8.2py3-none-any.whl