PyPI - eval-studio-client - Versions diffs - 1.0.0a1__py3-none-any.whl → 1.1.0a5__py3-none-any.whl - Mend

eval-studio-client 1.0.0a1py3-none-any.whl → 1.1.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (575) hide show

eval_studio_client/tests.py CHANGED Viewed

@@ -11,7 +11,11 @@ from typing import Union
 from eval_studio_client import api
 from eval_studio_client import documents as d7s
 from eval_studio_client import perturbators as p10s
+from eval_studio_client import utils
 from eval_studio_client.api import models
+from eval_studio_client.api.models import (
+    test_service_clone_test_request as clone_test_request,
+)
 class TestCaseGenerator(enum.Enum):
@@ -85,15 +89,8 @@ class TestCaseGenerator(enum.Enum):
 @dataclasses.dataclass
-class TestCaseGenerationHandle:
+class _TestCaseGenerationHandle:
     name: Any | None
-    create_time: Optional[datetime.datetime] = None
-    creator: Optional[str] = None
-    update_time: Optional[datetime.datetime] = None
-    updater: Optional[str] = None
-    delete_time: Optional[datetime.datetime] = None
-    deleter: Optional[str] = None
     progress: Optional[float] = None
     progress_message: Optional[str] = None
     error: Optional[models.RpcStatus] = None
@@ -102,11 +99,39 @@ class TestCaseGenerationHandle:
     @staticmethod
     def _from_operation(
         res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
-    ) -> "TestCaseGenerationHandle":
+    ) -> "_TestCaseGenerationHandle":
         """Converts an API operation to prompt generation handle."""
         op: models.V1Operation | None = res.operation
         if not op:
-            return TestCaseGenerationHandle(name=None)
+            return _TestCaseGenerationHandle(name=None)
+        # progress
+        if hasattr(op, "metadata") and op.metadata:
+            meta_dict = op.metadata.to_dict() or {}
+        else:
+            meta_dict = {}
+        return _TestCaseGenerationHandle(
+            name=op.name,
+            progress=meta_dict.get("progress"),
+            progress_message=meta_dict.get("progressMessage"),
+            error=op.error,
+            done=op.done,
+        )
+@dataclasses.dataclass
+class _TestCaseLibraryGetHandle(_TestCaseGenerationHandle):
+    @staticmethod
+    def _from_operation(
+        res: (
+            models.V1ImportTestCasesFromLibraryResponse | models.V1GetOperationResponse
+        ),
+    ) -> "_TestCaseLibraryGetHandle":
+        """Converts an API operation to prompt library handle."""
+        op: models.V1Operation | None = res.operation
+        if not op:
+            return _TestCaseLibraryGetHandle(name=None)
         # progress
         if hasattr(op, "metadata") and op.metadata:
@@ -114,14 +139,8 @@ class TestCaseGenerationHandle:
         else:
             meta_dict = {}
-        return TestCaseGenerationHandle(
+        return _TestCaseLibraryGetHandle(
             name=op.name,
-            create_time=op.create_time,
-            creator=op.creator,
-            update_time=op.update_time,
-            updater=op.updater,
-            delete_time=op.delete_time,
-            deleter=op.deleter,
             progress=meta_dict.get("progress"),
             progress_message=meta_dict.get("progressMessage"),
             error=op.error,
@@ -129,6 +148,42 @@ class TestCaseGenerationHandle:
         )
+@dataclasses.dataclass
+class TestCaseLibraryItem:
+    """Represents a single test case library item - test suite."""
+    key: str
+    name: str
+    description: str
+    test_suite_url: str
+    test_count: int
+    test_case_count: int
+    evaluates: List[str]
+    categories: List[str]
+    @staticmethod
+    def _from_api_items(
+        api_items: List[models.V1PromptLibraryItem],
+    ) -> List["TestCaseLibraryItem"]:
+        return (
+            [
+                TestCaseLibraryItem(
+                    key=api_item.name or "",
+                    name=api_item.display_name or "",
+                    description=api_item.description or "",
+                    test_suite_url=api_item.test_suite_url or "",
+                    test_count=api_item.test_count or 0,
+                    test_case_count=api_item.test_case_count or 0,
+                    evaluates=list(api_item.evaluates) if api_item.evaluates else [],
+                    categories=list(api_item.categories) if api_item.categories else [],
+                )
+                for api_item in api_items
+            ]
+            if api_items
+            else []
+        )
 @dataclasses.dataclass
 class TestCase:
     """Represents a single test case, which contains tested prompt, expected answer
@@ -173,6 +228,31 @@ class TestCase:
         )
+@dataclasses.dataclass
+class TestCaseRelationship:
+    source_test_case_key: str
+    target_test_case_key: str
+    relationship_type: str
+    def to_api_proto(self) -> models.V1TestCaseRelationship:
+        """Converts the client TestCase to an API TestCase."""
+        return models.V1TestCaseRelationship(
+            source=self.source_test_case_key,
+            target=self.target_test_case_key,
+            type=self.relationship_type,
+        )
+    @staticmethod
+    def _from_api_test_case_rel(
+        api_test_case_rel: models.V1TestCaseRelationship,
+    ) -> "TestCaseRelationship":
+        return TestCaseRelationship(
+            source_test_case_key=api_test_case_rel.source or "",
+            target_test_case_key=api_test_case_rel.target or "",
+            relationship_type=api_test_case_rel.type or "",
+        )
 @dataclasses.dataclass
 class Test:
     """Represents a test, which contains a set of test cases and optionally
@@ -193,6 +273,8 @@ class Test:
     create_time: Optional[datetime.datetime] = None
     update_time: Optional[datetime.datetime] = None
     _client: Optional[api.ApiClient] = None
+    _gen_tc_op_name: Optional[str] = None
+    _lib_tc_op_name: Optional[str] = None
     def __post_init__(self):
         if self._client:
@@ -200,6 +282,7 @@ class Test:
             self._test_case_api = api.TestCaseServiceApi(self._client)
             self._document_api = api.DocumentServiceApi(self._client)
             self._operation_api = api.OperationServiceApi(self._client)
+            self._relationships_api = api.TestCaseRelationshipServiceApi(self._client)
     @property
     def test_cases(self) -> List[TestCase]:
@@ -210,6 +293,19 @@ class Test:
         return []
+    @property
+    def test_case_relationships(self) -> List[TestCaseRelationship]:
+        """Retrieves all relationships among test cases of the test."""
+        r_a = self._relationships_api
+        res = r_a.test_case_relationship_service_list_test_case_relationships(self.key)
+        if res and res.test_case_relationships:
+            return [
+                TestCaseRelationship._from_api_test_case_rel(r)
+                for r in res.test_case_relationships
+            ]
+        return []
     @property
     def documents(self) -> List[d7s.Document]:
         """Retrieves all documents attached to the test."""
@@ -232,12 +328,12 @@ class Test:
         perturbators: Union[p10s.Perturbator, str, List[Union[p10s.Perturbator, str]]],
         new_test_description: str = "",
     ) -> "Test":
-        """Creates new Test by perturbing this test using the given Perturbators.
+        """Creates new Test by perturbing this test using the given perturbators.
         Args:
             new_test_name (str): Name of the newly created test.
-            perturbators (Perturbator, List[Perturbator], str or List[str]): List of Perturbators or
-                their keys used to perturbate this Test.
+            perturbators (Perturbator, List[Perturbator], str or List[str]): List of
+                perturbators or their keys used to perturbate this Test.
             new_test_description (str): Optional description of the newly created test.
         """
@@ -258,13 +354,46 @@ class Test:
         configs = [_PerturbatorConfiguration(p) for p in perturbators_to_run]
         req = models.TestServicePerturbTestRequest(
-            perturbatorConfigurations=[c.to_api_proto() for c in configs],
-            newTestDisplayName=new_test_name,
-            newTestDescription=new_test_description,
+            perturbator_configurations=[c.to_api_proto() for c in configs],
+            new_test_display_name=new_test_name,
+            new_test_description=new_test_description,
         )
         resp = self._test_api.test_service_perturb_test(self.key, req)
         return Test._from_api_test(resp.test, self._client)
+    def perturb_in_place(
+        self,
+        perturbators: Union[p10s.Perturbator, str, List[Union[p10s.Perturbator, str]]],
+        test_case_names: Optional[List[str]] = None,
+    ) -> str:
+        """In-place (in-test) perturbation of test cases using the given perturbators.
+        Args:
+            perturbators (Perturbator, List[Perturbator], str or List[str]): List of
+                perturbators or their keys used to perturbate this Test.
+            test_case_names (List[str]): List of test case names to perturbate.
+        """
+        if self._client is None:
+            raise RuntimeError("Client is not set.")
+        if not perturbators:
+            raise ValueError("Perturbators must be provided.")
+        if isinstance(perturbators, (p10s.Perturbator, str)):
+            perturbators_to_run = [perturbators]
+        else:
+            perturbators_to_run = perturbators
+        configs = [_PerturbatorConfiguration(p) for p in perturbators_to_run]
+        req = models.TestServicePerturbTestInPlaceRequest(
+            perturbator_configurations=[c.to_api_proto() for c in configs],
+            test_case_names=test_case_names,
+        )
+        resp = self._test_api.test_service_perturb_test(self.key, req)
+        return resp.test.name
     def generate_test_cases(
         self,
         count: int,
@@ -272,12 +401,12 @@ class Test:
         base_llm_model: Optional[str] = None,
         generators: Optional[List[TestCaseGenerator]] = None,
         existing_collection: Optional[str] = None,
-    ) -> "TestCaseGenerationHandle":
+    ) -> None:
         """Generates test cases based on the documents of the Test.
         Args:
             count (int): Number of test cases to generate (generator may return fewer
-               prompts).
+                prompts).
             model (str): Model to use for generating the prompts.
             base_llm_model (str): Base LLM model to use for generating the prompts.
             generators (List[TestCaseGenerator]): Methods to use for generation.
@@ -296,28 +425,28 @@ class Test:
         res = self._test_api.test_service_generate_test_cases(self.key, req)
-        return TestCaseGenerationHandle._from_operation(res)
+        op: models.V1Operation | None = res.operation
+        self._gen_tc_op_name = op.name if op else None
     def wait_for_test_case_generation(
-        self,
-        handle: TestCaseGenerationHandle,
-        timeout: Optional[float] = None,
-        verbose: bool = False,
-    ) -> TestCaseGenerationHandle:
+        self, timeout: Optional[float] = None, verbose: bool = False
+    ) -> None:
         """Waits for the test case generation to finish.
         Args:
-            handle (TestCaseGenerationHandle): Handle of the test case generation.
             timeout (float): The maximum time to wait in seconds.
             verbose (bool): If True, prints the status of the handle while waiting.
         """
-        if not handle.name:
-            raise ValueError("Test case generation handle is not valid.")
-        elif handle.done:
-            return handle
+        if not self._gen_tc_op_name:
+            raise ValueError(
+                "There is no ongoing test case generation - the operation name is not "
+                "set."
+            )
         if verbose:
-            print(f"Waiting for test case generation to finish ({handle.name}):")
+            print(
+                f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
+            )
         if self._client:
             # exponential backoff
             wait_time = 1.0
@@ -325,37 +454,151 @@ class Test:
             wait_max = 8.0
             wait_total = 0.0
             timeout = timeout or float(2 * 24 * 60 * 60)  # 2 days
-            # progress
-            p_max = 1.0
-            p_msg = ""
+            progress_bar = utils.ProgressBar()
             while wait_total < timeout:
-                handle = TestCaseGenerationHandle._from_operation(
-                    self._operation_api.operation_service_get_operation(handle.name)
+                handle = _TestCaseGenerationHandle._from_operation(
+                    self._operation_api.operation_service_get_operation(
+                        self._gen_tc_op_name
+                    )
                 )
                 if verbose:
-                    print(" " * len(p_msg), end="\r")
-                    if handle.progress or handle.progress_message:
-                        try:
-                            h_progress = float(str(handle.progress))
-                        except ValueError:
-                            h_progress = 0.0
-                        h_msg = handle.progress_message or "Processing"
-                    else:
-                        h_progress = 0.0
-                        h_msg = "Initializing"
-                    p_progress = int(h_progress / p_max * 100)
-                    p_hashes = p_progress // 5
-                    p_msg = f"  {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
-                    print(p_msg, end="\r")
+                    progress_bar.update(handle.progress or 0, handle.progress_message)
                 if handle.done:
-                    return handle
+                    if handle.error:
+                        raise RuntimeError(
+                            f"Test case generation failed: {handle.error}"
+                        )
+                    return
                 wait_time *= wait_coef
                 time.sleep(min(wait_time, wait_max))
         else:
-            raise ValueError("Cannot establish connection to Eval Studio host.")
+            raise ValueError(
+                "Unable to establish a connection to the Eval Studio host."
+            )
+        raise TimeoutError("Waiting timeout has been reached.")
+    def list_test_suite_library_items(
+        self,
+        filter_by_categories: Optional[List[str]] = None,
+        filter_by_purposes: Optional[List[str]] = None,
+        filter_by_evaluates: Optional[List[str]] = None,
+        filter_by_origin: Optional[str] = None,
+        filter_by_test_case_count: Optional[int] = None,
+        filter_by_test_count: Optional[int] = None,
+        filter_by_fts: Optional[str] = None,
+    ) -> List[TestCaseLibraryItem]:
+        """Retrieves a list of all available items - suites of tests - in the library.
+        Args:
+            filter_by_categories (List[str]): List of categories to filter
+                the library items.
+            filter_by_purposes (List[str]): List of purposes to filter
+                the library items.
+            filter_by_evaluates (List[str]): List of evaluates to filter
+                the library items.
+            filter_by_origin (str): Origin to filter the library items.
+            filter_by_test_case_count (int): Test case count to filter
+                the library items.
+            filter_by_test_count (int): Test count to filter the library items.
+            filter_by_fts (str): FTS to filter the library items - phrase to search for.
+        Returns:
+            List[TestCaseLibraryItem]: List of library items.
+        """
+        req = models.TestServiceListTestCaseLibraryItemsRequest(
+            filter_by_categories=filter_by_categories,
+            filter_by_purposes=filter_by_purposes,
+            filter_by_evaluates=filter_by_evaluates,
+            filter_by_origin=filter_by_origin,
+            filter_by_test_case_count=filter_by_test_case_count,
+            filter_by_test_count=filter_by_test_count,
+            filter_by_fts=filter_by_fts,
+        )
+        res = self._test_api.test_service_list_test_case_library_items(self.key, req)
+        if res and res.prompt_library_items:
+            return TestCaseLibraryItem._from_api_items(res.prompt_library_items)
+        return []
+    def add_library_test_cases(
+        self, test_suite_url: str, count: int, test_document_urls: Optional[List[str]]
+    ) -> None:
+        """Sample test cases from the test suite library and add them to the test.
+        Args:
+            test_suite_url (str): The URL of the library test suite to get TestCases
+                from (sample).
+            count (int): The number of TestCases to get from the library.
+            test_document_urls (List[str]): The list of target Test corpus
+                document URLs to skip when returning library TestCases corpus.
+        """
+        req = models.TestServiceImportTestCasesFromLibraryRequest(
+            test_suite_url=test_suite_url,
+            count=count,
+            test_document_urls=test_document_urls,
+        )
+        res = self._test_api.test_service_import_test_cases_from_library(self.key, req)
+        op: models.V1Operation | None = res.operation
+        self._lib_tc_op_name = op.name if op else None
+    def wait_for_library_test_case_get(
+        self, timeout: Optional[float] = None, verbose: bool = False
+    ) -> None:
+        """Waits for the library test cases(s) sampling  to finish.
+        Args:
+            timeout (float): The maximum time to wait in seconds.
+            verbose (bool): If True, prints the status of the handle while waiting.
+        """
+        if not self._lib_tc_op_name:
+            raise ValueError(
+                "There is no ongoing getting of test case(s) from the library - "
+                "the operation name is not set."
+            )
+        if verbose:
+            print(
+                f"Waiting for getting library test case(s) operation to finish "
+                f"({self._lib_tc_op_name}):"
+            )
+        if self._client:
+            # exponential backoff
+            wait_time = 1.0
+            wait_coef = 1.6
+            wait_max = 8.0
+            wait_total = 0.0
+            timeout = timeout or float(2 * 24 * 60 * 60)  # 2 days
+            progress_bar = utils.ProgressBar()
+            while wait_total < timeout:
+                handle = _TestCaseLibraryGetHandle._from_operation(
+                    self._operation_api.operation_service_get_operation(
+                        self._lib_tc_op_name
+                    )
+                )
+                if verbose:
+                    progress_bar.update(handle.progress or 0, handle.progress_message)
+                if handle.done:
+                    if handle.error:
+                        raise RuntimeError(
+                            f"Getting of library test case(s) failed: {handle.error}"
+                        )
+                    return
+                wait_time *= wait_coef
+                time.sleep(min(wait_time, wait_max))
+        else:
+            raise ValueError(
+                "Unable to establish a connection to the Eval Studio host."
+            )
         raise TimeoutError("Waiting timeout has been reached.")
@@ -524,6 +767,28 @@ class _Tests:
         return None
+    def clone(
+        self, key: str, name: Optional[str] = "", description: Optional[str] = ""
+    ) -> Optional[Test]:
+        """Clone an existing test in the Eval Studio.
+        Args:
+            key (str): Resource name of the test to be cloned.
+            name (str): Optional new name of the cloned test.
+            description (str): Optional new description of the cloned test.
+        """
+        res = self._api.test_service_clone_test(
+            key,
+            body=clone_test_request.TestServiceCloneTestRequest(
+                new_test_display_name=name, new_test_description=description
+            ),
+        )
+        if res and res.test:
+            return Test._from_api_test(res.test, self._client)
+        return None
     def delete(self, key: str):
         """Deletes the test with given resource name.

eval_studio_client/utils.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Optional
+class ProgressBar:
+    def __init__(self):
+        self.progress = 0.0
+        self.progress_message = "Initializing"
+        self._progress_max = 1.0
+    def update(self, progress: float, message: Optional[str] = None):
+        try:
+            self.progress = float(str(progress))
+        except ValueError:
+            self.progress = 0.0
+        if message:
+            self.progress_message = message or ""
+        self.print()
+    def print(self):
+        print(" " * len(self.progress_message), end="\r")
+        p_progress = int(self.progress / self._progress_max * 100)
+        p_hashes = p_progress // 5
+        p_msg = f"  {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
+        print(p_msg, end="\r")

{eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.1.0a5.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: eval-studio-client
-Version: 1.0.0a1
+Version: 1.1.0a5
 Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
 Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
 Author-email: "H2O.ai" <support@h2o.ai>
-License: MIT
 Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3.9

eval-studio-client 1.0.0a1__py3-none-any.whl → 1.1.0a5__py3-none-any.whl

eval-studio-client 1.0.0a1py3-none-any.whl → 1.1.0a5py3-none-any.whl