PyPI - eval-studio-client - Versions diffs - 0.8.0a2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

eval-studio-client 0.8.0a2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (574) hide show

eval_studio_client/tests.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import dataclasses
 import datetime
+import enum
 import json
+import time
+from typing import Any
 from typing import List
 from typing import Optional
 from typing import Union
@@ -8,9 +11,113 @@ from typing import Union
 from eval_studio_client import api
 from eval_studio_client import documents as d7s
 from eval_studio_client import perturbators as p10s
+from eval_studio_client import utils
 from eval_studio_client.api import models
+class TestCaseGenerator(enum.Enum):
+    """Methods used for test case generation."""
+    unspecified = models.V1TestCasesGenerator.TEST_CASES_GENERATOR_UNSPECIFIED
+    simple_factual_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SIMPLE_FACTUAL_QUESTIONS
+    )
+    multi_hop_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTI_HOP_QUESTIONS
+    )
+    inference_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_INFERENCE_QUESTIONS
+    )
+    numerical_reasoning_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NUMERICAL_REASONING_QUESTIONS
+    )
+    ambiguity_handling_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_AMBIGUITY_HANDLING_QUESTIONS
+    )
+    negation_and_contradiction_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NEGATION_AND_CONTRADICTION_QUESTIONS
+    )
+    temporal_reasoning_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_TEMPORAL_REASONING_QUESTIONS
+    )
+    out_of_scope_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_OUT_OF_SCOPE_QUESTIONS
+    )
+    yes_or_no_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_YES_OR_NO_QUESTIONS
+    )
+    multiple_choice_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTIPLE_CHOICE_QUESTIONS
+    )
+    demographic_representation_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DEMOGRAPHIC_REPRESENTATION_QUESTIONS
+    )
+    sentiment_variation_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SENTIMENT_VARIATION_QUESTIONS
+    )
+    irrelevant_information_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_IRRELEVANT_INFORMATION_QUESTIONS
+    )
+    deliberately_misleading_questions = (
+        models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DELIBERATELY_MISLEADING_QUESTIONS
+    )
+    def to_api_proto(self) -> models.V1TestCasesGenerator:
+        """Converts the client TestCaseGenerator to an API TestCaseGeneration."""
+        proto_values = {
+            TestCaseGenerator.unspecified: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_UNSPECIFIED,
+            TestCaseGenerator.simple_factual_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SIMPLE_FACTUAL_QUESTIONS,
+            TestCaseGenerator.multi_hop_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTI_HOP_QUESTIONS,
+            TestCaseGenerator.inference_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_INFERENCE_QUESTIONS,
+            TestCaseGenerator.numerical_reasoning_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NUMERICAL_REASONING_QUESTIONS,
+            TestCaseGenerator.ambiguity_handling_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_AMBIGUITY_HANDLING_QUESTIONS,
+            TestCaseGenerator.negation_and_contradiction_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_NEGATION_AND_CONTRADICTION_QUESTIONS,
+            TestCaseGenerator.temporal_reasoning_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_TEMPORAL_REASONING_QUESTIONS,
+            TestCaseGenerator.out_of_scope_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_OUT_OF_SCOPE_QUESTIONS,
+            TestCaseGenerator.yes_or_no_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_YES_OR_NO_QUESTIONS,
+            TestCaseGenerator.multiple_choice_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_MULTIPLE_CHOICE_QUESTIONS,
+            TestCaseGenerator.demographic_representation_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DEMOGRAPHIC_REPRESENTATION_QUESTIONS,
+            TestCaseGenerator.sentiment_variation_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_SENTIMENT_VARIATION_QUESTIONS,
+            TestCaseGenerator.irrelevant_information_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_IRRELEVANT_INFORMATION_QUESTIONS,
+            TestCaseGenerator.deliberately_misleading_questions: models.V1TestCasesGenerator.TEST_CASES_GENERATOR_DELIBERATELY_MISLEADING_QUESTIONS,
+        }
+        return proto_values[self]
+@dataclasses.dataclass
+class _TestCaseGenerationHandle:
+    name: Any | None
+    progress: Optional[float] = None
+    progress_message: Optional[str] = None
+    error: Optional[models.RpcStatus] = None
+    done: Optional[bool] = None
+    @staticmethod
+    def _from_operation(
+        res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
+    ) -> "_TestCaseGenerationHandle":
+        """Converts an API operation to prompt generation handle."""
+        op: models.V1Operation | None = res.operation
+        if not op:
+            return _TestCaseGenerationHandle(name=None)
+        # progress
+        if hasattr(op, "metadata") and op.metadata:
+            meta_dict = op.metadata.to_dict() or {}
+        else:
+            meta_dict = {}
+        return _TestCaseGenerationHandle(
+            name=op.name,
+            progress=meta_dict.get("progress"),
+            progress_message=meta_dict.get("progressMessage"),
+            error=op.error,
+            done=op.done,
+        )
 @dataclasses.dataclass
 class TestCase:
     """Represents a single test case, which contains tested prompt, expected answer
@@ -35,9 +142,9 @@ class TestCase:
     create_time: Optional[datetime.datetime] = None
     update_time: Optional[datetime.datetime] = None
-    def to_api_proto(self) -> models.V1alphaTestCase:
+    def to_api_proto(self) -> models.V1TestCase:
         """Converts the client TestCase to an API TestCase."""
-        return models.V1alphaTestCase(
+        return models.V1TestCase(
             prompt=self.prompt,
             answer=self.answer,
             constraints=self.constraints,
@@ -45,7 +152,7 @@ class TestCase:
         )
     @staticmethod
-    def _from_api_test_case(api_test_case: models.V1alphaTestCase) -> "TestCase":
+    def _from_api_test_case(api_test_case: models.V1TestCase) -> "TestCase":
         return TestCase(
             key=api_test_case.name or "",
             prompt=api_test_case.prompt or "",
@@ -75,12 +182,14 @@ class Test:
     create_time: Optional[datetime.datetime] = None
     update_time: Optional[datetime.datetime] = None
     _client: Optional[api.ApiClient] = None
+    _gen_tc_op_name: Optional[str] = None
     def __post_init__(self):
         if self._client:
             self._test_api = api.TestServiceApi(self._client)
             self._test_case_api = api.TestCaseServiceApi(self._client)
             self._document_api = api.DocumentServiceApi(self._client)
+            self._operation_api = api.OperationServiceApi(self._client)
     @property
     def test_cases(self) -> List[TestCase]:
@@ -146,6 +255,93 @@ class Test:
         resp = self._test_api.test_service_perturb_test(self.key, req)
         return Test._from_api_test(resp.test, self._client)
+    def generate_test_cases(
+        self,
+        count: int,
+        model: Optional[str] = None,
+        base_llm_model: Optional[str] = None,
+        generators: Optional[List[TestCaseGenerator]] = None,
+        existing_collection: Optional[str] = None,
+    ) -> None:
+        """Generates test cases based on the documents of the Test.
+        Args:
+            count (int): Number of test cases to generate (generator may return fewer
+               prompts).
+            model (str): Model to use for generating the prompts.
+            base_llm_model (str): Base LLM model to use for generating the prompts.
+            generators (List[TestCaseGenerator]): Methods to use for generation.
+            existing_collection (str): ID or the resource name of the existing
+                collection, from which prompts will be generated.
+                NOTE: This option works only for the H2OGPTe model host ATM.
+        """
+        req = models.TestServiceGenerateTestCasesRequest(
+            count=count,
+            model=model or None,
+            base_llm_model=base_llm_model or None,
+            generators=[g.to_api_proto() for g in generators] if generators else None,
+            h2ogpte_collection_id=existing_collection or None,
+        )
+        res = self._test_api.test_service_generate_test_cases(self.key, req)
+        op: models.V1Operation | None = res.operation
+        self._gen_tc_op_name = op.name if op else None
+    def wait_for_test_case_generation(
+        self, timeout: Optional[float] = None, verbose: bool = False
+    ) -> None:
+        """Waits for the test case generation to finish.
+        Args:
+            timeout (float): The maximum time to wait in seconds.
+            verbose (bool): If True, prints the status of the handle while waiting.
+        """
+        if not self._gen_tc_op_name:
+            raise ValueError(
+                "There is no ongoing test case generation - the operation name is not "
+                "set."
+            )
+        if verbose:
+            print(
+                f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
+            )
+        if self._client:
+            # exponential backoff
+            wait_time = 1.0
+            wait_coef = 1.6
+            wait_max = 8.0
+            wait_total = 0.0
+            timeout = timeout or float(2 * 24 * 60 * 60)  # 2 days
+            progress_bar = utils.ProgressBar()
+            while wait_total < timeout:
+                handle = _TestCaseGenerationHandle._from_operation(
+                    self._operation_api.operation_service_get_operation(
+                        self._gen_tc_op_name
+                    )
+                )
+                if verbose:
+                    progress_bar.update(handle.progress or 0, handle.progress_message)
+                if handle.done:
+                    if handle.error:
+                        raise RuntimeError(
+                            f"Test case generation failed: {handle.error}"
+                        )
+                    return
+                wait_time *= wait_coef
+                time.sleep(min(wait_time, wait_max))
+        else:
+            raise ValueError(
+                "Unable to establish a connection to the Eval Studio host."
+            )
+        raise TimeoutError("Waiting timeout has been reached.")
     def delete(self, force=False):
         """Deletes the test.
@@ -263,7 +459,7 @@ class Test:
             raise RuntimeError("Failed to unlink the document from the test.") from err
     @staticmethod
-    def _from_api_test(api_test: models.V1alphaTest, client: api.ApiClient) -> "Test":
+    def _from_api_test(api_test: models.V1Test, client: api.ApiClient) -> "Test":
         return Test(
             key=api_test.name or "",
             name=api_test.display_name or "",
@@ -302,7 +498,7 @@ class _Tests:
             documents (optional): List of `Document`s to be attached to the test.
         """
         _documents = [d.key for d in documents] if documents else None
-        test = models.V1alphaTest(
+        test = models.V1Test(
             display_name=name, description=description, documents=_documents
         )
         res = self._api.test_service_create_test(test)
@@ -319,6 +515,24 @@ class _Tests:
         """
         self._api.test_service_delete_test(key)
+    def get(self, key: str) -> Test:
+        """Get the test with given resource name.
+        Args:
+            key (str): Resource name of the test to be get.
+        Returns:
+            An instance of the retrieved `Test`.
+        Raises:
+            KeyError: If the test with the given key does not exist.
+        """
+        api_test = self._api.test_service_get_test(key)
+        if not api_test or not api_test.test:
+            raise KeyError(f"Test with key '{key}' does not exist.")
+        return Test._from_api_test(api_test.test, self._client)
     def import_test_suite(
         self, test_suite: str, name_prefix: Optional[str] = None
     ) -> List[Test]:
@@ -328,7 +542,7 @@ class _Tests:
             test_suite (str): JSON string of the test suite.
             name_prefix (str): Optional prefix to name the imported tests.
         """
-        req = models.V1alphaBatchImportTestsRequest(
+        req = models.V1BatchImportTestsRequest(
             testsJson=test_suite, testDisplayNamePrefix=name_prefix or None
         )
         res = self._api.test_service_batch_import_tests(req)
@@ -360,9 +574,9 @@ class _PerturbatorConfiguration:
             perturbator.params if isinstance(perturbator, p10s.Perturbator) else None
         )
-    def to_api_proto(self) -> models.V1alphaPerturbatorConfiguration:
+    def to_api_proto(self) -> models.V1PerturbatorConfiguration:
         """Converts the client PerturbatorConfiguration to an API PerturbatorConfiguration."""
-        return models.V1alphaPerturbatorConfiguration(
+        return models.V1PerturbatorConfiguration(
             name=self.name,
             intensity=self.intensity.to_api_proto(),
             params=json.dumps(self.params) if self.params else None,

eval_studio_client/utils.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Optional
+class ProgressBar:
+    def __init__(self):
+        self.progress = 0.0
+        self.progress_message = "Initializing"
+        self._progress_max = 1.0
+    def update(self, progress: float, message: Optional[str] = None):
+        try:
+            self.progress = float(str(progress))
+        except ValueError:
+            self.progress = 0.0
+        if message:
+            self.progress_message = message or ""
+        self.print()
+    def print(self):
+        print(" " * len(self.progress_message), end="\r")
+        p_progress = int(self.progress / self._progress_max * 100)
+        p_hashes = p_progress // 5
+        p_msg = f"  {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
+        print(p_msg, end="\r")

{eval_studio_client-0.8.0a2.dist-info → eval_studio_client-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.3
 Name: eval-studio-client
-Version: 0.8.0a2
+Version: 1.0.0
 Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
 Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
 Author-email: "H2O.ai" <support@h2o.ai>
-License-Expression: MIT
+License: MIT
 Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3.9

eval-studio-client 0.8.0a2__py3-none-any.whl → 1.0.0__py3-none-any.whl

eval-studio-client 0.8.0a2py3-none-any.whl → 1.0.0py3-none-any.whl