PyPI - eval-studio-client - Versions diffs - 1.0.0a1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

eval-studio-client 1.0.0a1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

eval_studio_client/dashboards.py CHANGED Viewed

@@ -9,6 +9,7 @@ from eval_studio_client import api
 from eval_studio_client import insights as i6s
 from eval_studio_client import leaderboards as l10s
 from eval_studio_client import problems as p6s
+from eval_studio_client import utils
 from eval_studio_client.api import models
@@ -41,6 +42,7 @@ class Dashboard:
             self._dashboard_api = api.DashboardServiceApi(self._client)
             self._leaderboard_api = api.LeaderboardServiceApi(self._client)
             self._info_api = api.InfoServiceApi(self._client)
+            self._operation_api = api.OperationServiceApi(self._client)
     @property
     def leaderboards(self) -> Optional[List[l10s.Leaderboard]]:
@@ -118,36 +120,56 @@ class Dashboard:
         else:
             raise ValueError("Cannot establish connection to Eval Studio host.")
-    def wait_to_finish(self, timeout: Optional[float] = None):
+    def wait_to_finish(self, timeout: Optional[float] = None, verbose: bool = False):
         """Waits for the dashboard to finish.
         Args:
             timeout: The maximum time to wait in seconds.
+            verbose (bool): If True, prints the status of the evaluation while waiting.
         """
         timeout = timeout or float("inf")
+        progress_bar = utils.ProgressBar()
         if self.finished:
             return
+        if not self._create_operation:
+            # This means that the evaluation has no assigned operation, thus cannot poll.
+            raise RuntimeError("Failed to retrieve running evaluation info.")
         if self._client:
             ctr = 0
             while ctr < timeout:
-                lbs = self.leaderboards
-                if lbs:
-                    if all(lb.finished for lb in lbs):
-                        return
-                    ctr += 1
-                    time.sleep(1)
+                op = self._operation_api.operation_service_get_operation(
+                    self._create_operation
+                )
+                if not op or not op.operation:
+                    raise RuntimeError(
+                        "Failed to retrieve running evaluation progress."
+                    )
+                if verbose:
+                    if not op.operation.metadata:
+                        raise RuntimeError(
+                            "Failed to retrieve running evaluation progress details."
+                        )
+                    op_meta = op.operation.metadata.to_dict()
+                    progress = op_meta.get("progress", 0)
+                    progress_msg = op_meta.get("progressMessage", "Running")
+                    progress_bar.update(progress, progress_msg)
+                if op.operation.done:
+                    return
+                ctr += 1
+                time.sleep(1)
         else:
             raise ValueError("Cannot establish connection to Eval Studio host.")
         raise TimeoutError("Waiting timeout has been reached.")
-    def show(self):
-        """Opens the evaluation in the default web browser.
-        NOTE: This functionality is primarily for interactive use in Jupyter notebooks.
-        """
+    def show(self) -> str:
+        """Prints the endpoint URL of the evaluation dashboard."""
         if self._client:
             info_res = self._info_api.info_service_get_info()
             if not info_res or not info_res.info:
@@ -155,11 +177,8 @@ class Dashboard:
             host = info_res.info.base_url
             url = urllib.parse.urljoin(host, self.key)
-            # NOTE: Local import is used to avoid problems for users outside Jupyter environment.
-            import webbrowser
-            webbrowser.open(url)
+            print(f"Open following url to access evaluation dashboard: \n\n{url}")
+            return url
         else:
             raise ValueError("Cannot establish connection to Eval Studio host.")

eval_studio_client/leaderboards.py CHANGED Viewed

@@ -39,6 +39,7 @@ class Leaderboard:
     _model_name: Optional[str] = None
     _status: Optional[models.V1LeaderboardStatus] = None
     _client: Optional[api.ApiClient] = None
+    _operation: Optional[str] = None
     def __post_init__(self):
         self._evaluator_api = api.EvaluatorServiceApi(self._client)
@@ -198,6 +199,7 @@ class Leaderboard:
             _leaderboard=api_leaderboard.leaderboard_table,
             _status=api_leaderboard.status,
             _client=client,
+            _operation=api_leaderboard.create_operation or None,
         )
     @staticmethod

eval_studio_client/tests.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import Union
 from eval_studio_client import api
 from eval_studio_client import documents as d7s
 from eval_studio_client import perturbators as p10s
+from eval_studio_client import utils
 from eval_studio_client.api import models
@@ -85,15 +86,9 @@ class TestCaseGenerator(enum.Enum):
 @dataclasses.dataclass
-class TestCaseGenerationHandle:
+class _TestCaseGenerationHandle:
     name: Any | None
-    create_time: Optional[datetime.datetime] = None
-    creator: Optional[str] = None
-    update_time: Optional[datetime.datetime] = None
-    updater: Optional[str] = None
-    delete_time: Optional[datetime.datetime] = None
-    deleter: Optional[str] = None
     progress: Optional[float] = None
     progress_message: Optional[str] = None
     error: Optional[models.RpcStatus] = None
@@ -102,11 +97,11 @@ class TestCaseGenerationHandle:
     @staticmethod
     def _from_operation(
         res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
-    ) -> "TestCaseGenerationHandle":
+    ) -> "_TestCaseGenerationHandle":
         """Converts an API operation to prompt generation handle."""
         op: models.V1Operation | None = res.operation
         if not op:
-            return TestCaseGenerationHandle(name=None)
+            return _TestCaseGenerationHandle(name=None)
         # progress
         if hasattr(op, "metadata") and op.metadata:
@@ -114,14 +109,8 @@ class TestCaseGenerationHandle:
         else:
             meta_dict = {}
-        return TestCaseGenerationHandle(
+        return _TestCaseGenerationHandle(
             name=op.name,
-            create_time=op.create_time,
-            creator=op.creator,
-            update_time=op.update_time,
-            updater=op.updater,
-            delete_time=op.delete_time,
-            deleter=op.deleter,
             progress=meta_dict.get("progress"),
             progress_message=meta_dict.get("progressMessage"),
             error=op.error,
@@ -193,6 +182,7 @@ class Test:
     create_time: Optional[datetime.datetime] = None
     update_time: Optional[datetime.datetime] = None
     _client: Optional[api.ApiClient] = None
+    _gen_tc_op_name: Optional[str] = None
     def __post_init__(self):
         if self._client:
@@ -272,7 +262,7 @@ class Test:
         base_llm_model: Optional[str] = None,
         generators: Optional[List[TestCaseGenerator]] = None,
         existing_collection: Optional[str] = None,
-    ) -> "TestCaseGenerationHandle":
+    ) -> None:
         """Generates test cases based on the documents of the Test.
         Args:
@@ -296,28 +286,28 @@ class Test:
         res = self._test_api.test_service_generate_test_cases(self.key, req)
-        return TestCaseGenerationHandle._from_operation(res)
+        op: models.V1Operation | None = res.operation
+        self._gen_tc_op_name = op.name if op else None
     def wait_for_test_case_generation(
-        self,
-        handle: TestCaseGenerationHandle,
-        timeout: Optional[float] = None,
-        verbose: bool = False,
-    ) -> TestCaseGenerationHandle:
+        self, timeout: Optional[float] = None, verbose: bool = False
+    ) -> None:
         """Waits for the test case generation to finish.
         Args:
-            handle (TestCaseGenerationHandle): Handle of the test case generation.
             timeout (float): The maximum time to wait in seconds.
             verbose (bool): If True, prints the status of the handle while waiting.
         """
-        if not handle.name:
-            raise ValueError("Test case generation handle is not valid.")
-        elif handle.done:
-            return handle
+        if not self._gen_tc_op_name:
+            raise ValueError(
+                "There is no ongoing test case generation - the operation name is not "
+                "set."
+            )
         if verbose:
-            print(f"Waiting for test case generation to finish ({handle.name}):")
+            print(
+                f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
+            )
         if self._client:
             # exponential backoff
             wait_time = 1.0
@@ -325,37 +315,30 @@ class Test:
             wait_max = 8.0
             wait_total = 0.0
             timeout = timeout or float(2 * 24 * 60 * 60)  # 2 days
-            # progress
-            p_max = 1.0
-            p_msg = ""
+            progress_bar = utils.ProgressBar()
             while wait_total < timeout:
-                handle = TestCaseGenerationHandle._from_operation(
-                    self._operation_api.operation_service_get_operation(handle.name)
+                handle = _TestCaseGenerationHandle._from_operation(
+                    self._operation_api.operation_service_get_operation(
+                        self._gen_tc_op_name
+                    )
                 )
                 if verbose:
-                    print(" " * len(p_msg), end="\r")
-                    if handle.progress or handle.progress_message:
-                        try:
-                            h_progress = float(str(handle.progress))
-                        except ValueError:
-                            h_progress = 0.0
-                        h_msg = handle.progress_message or "Processing"
-                    else:
-                        h_progress = 0.0
-                        h_msg = "Initializing"
-                    p_progress = int(h_progress / p_max * 100)
-                    p_hashes = p_progress // 5
-                    p_msg = f"  {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
-                    print(p_msg, end="\r")
+                    progress_bar.update(handle.progress or 0, handle.progress_message)
                 if handle.done:
-                    return handle
+                    if handle.error:
+                        raise RuntimeError(
+                            f"Test case generation failed: {handle.error}"
+                        )
+                    return
                 wait_time *= wait_coef
                 time.sleep(min(wait_time, wait_max))
         else:
-            raise ValueError("Cannot establish connection to Eval Studio host.")
+            raise ValueError(
+                "Unable to establish a connection to the Eval Studio host."
+            )
         raise TimeoutError("Waiting timeout has been reached.")

eval_studio_client/utils.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import Optional
+class ProgressBar:
+    def __init__(self):
+        self.progress = 0.0
+        self.progress_message = "Initializing"
+        self._progress_max = 1.0
+    def update(self, progress: float, message: Optional[str] = None):
+        try:
+            self.progress = float(str(progress))
+        except ValueError:
+            self.progress = 0.0
+        if message:
+            self.progress_message = message or ""
+        self.print()
+    def print(self):
+        print(" " * len(self.progress_message), end="\r")
+        p_progress = int(self.progress / self._progress_max * 100)
+        p_hashes = p_progress // 5
+        p_msg = f"  {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
+        print(p_msg, end="\r")

{eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,10 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: eval-studio-client
-Version: 1.0.0a1
+Version: 1.0.2
 Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
 Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
 Author-email: "H2O.ai" <support@h2o.ai>
-License: MIT
+License-Expression: MIT
 Classifier: Development Status :: 4 - Beta
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3.9

{eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.2.dist-info}/RECORD RENAMED Viewed

@@ -1,16 +1,17 @@
 eval_studio_client/__about__.py,sha256=7TnXVu0lNAY4UdQ_2iwTlAENGdigMUVBy6UmtWGB6sQ,30
 eval_studio_client/__init__.py,sha256=v8lXY_l4j3lAbIfW21nZFeWZX0sl4nKHbB29h2qYVU8,207
 eval_studio_client/client.py,sha256=khRFtcFNZHAMe1bA7SyvoLOPHVZQ2XJOZ3UB3gX8EKs,3307
-eval_studio_client/dashboards.py,sha256=S35kude0FSn-v0t-H1N6aHhsNhlmIgF3duKR8TUfKes,7331
+eval_studio_client/dashboards.py,sha256=TBMiO4OvTnWYSVuj2-EBxSdKQtEAb_HXgc9gXtRnu-s,8381
 eval_studio_client/documents.py,sha256=fjsbHnqZnouu0stCf_p15RgoszkY4_gIsbX1hiw7Xv8,3076
 eval_studio_client/evaluators.py,sha256=blJlWMswIGr1u6TQDiiO-fInYVnkBT0Y02J57o8Z094,2100
 eval_studio_client/insights.py,sha256=bhe6XBVJ61-2bcDdNe6HiZsu0sly8LeoYAKo1GkgK08,1199
-eval_studio_client/leaderboards.py,sha256=5S4cJVS8bX_KoRcT_75eXxrDY-xdfkQdehwGgIgIBfU,7933
+eval_studio_client/leaderboards.py,sha256=NHko_kuPIXnbBdEDMK1MHQmHJRCHA7_Q1wx4eqBvBF8,8035
 eval_studio_client/models.py,sha256=nW1Wk6L89iWSjhMVk_sKmxSomKX3b6ANALbwWvbJ7Uk,21346
 eval_studio_client/perturbators.py,sha256=CtcWqEgPGpOcDHvYAQBlNDKnS-ZDBkL7Y_Ygsgpvikw,3133
 eval_studio_client/problems.py,sha256=rdGIfo7AqyxGhWMpbIDX1WXFoQvzKktKAWDKRde5VbY,1515
 eval_studio_client/test_labs.py,sha256=IEY98Ocu7WQcxZN_jy5YthVBoHAgHjgA2T93U7q0eYE,11260
-eval_studio_client/tests.py,sha256=xMKI3OC-dRHlss484gkuLWcF-XFuLZxx7-XMIuNmAxU,23236
+eval_studio_client/tests.py,sha256=_Qu6X4FoocYJ-liClXLQqIR91P7GjWmxpeyDhRl5JXI,22393
+eval_studio_client/utils.py,sha256=e5bsQVgNHYNSqSOthxlmncerPdgbvWwQaY_C-libuXk,764
 eval_studio_client/api/__init__.py,sha256=Ef5qooH4SLfYUqVBJl79oRKWYnXryDPZV4IXGfvG1Wc,15269
 eval_studio_client/api/api_client.py,sha256=yFQKmCsVhswcTbdGY4lf-61mf8FVm3Kfon8Qhe1sPKw,26431
 eval_studio_client/api/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
@@ -480,6 +481,6 @@ eval_studio_client/api/test/test_v1_update_test_response.py,sha256=pqTwL9SgoOM9k
 eval_studio_client/api/test/test_v1_who_am_i_response.py,sha256=bNbjL5-b-4asyziW6znJhuU2yrzd9RgJa2ZiNw3e6YA,1523
 eval_studio_client/api/test/test_who_am_i_service_api.py,sha256=gYWKFamJMyVne2QaOSPz6WEkxExRuAphMGKf1nFayLU,898
 eval_studio_client/gen/openapiv2/eval_studio.swagger.json,sha256=2jOBBxQ2H2mS9C_nlqoTrTiYMmCLaUFQym6su3fXJ8I,210976
-eval_studio_client-1.0.0a1.dist-info/METADATA,sha256=rX1UrncVa_ayrO30V9oeNhTjqV1EWNyBFOvL2q8YJ9c,709
-eval_studio_client-1.0.0a1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
-eval_studio_client-1.0.0a1.dist-info/RECORD,,
+eval_studio_client-1.0.2.dist-info/METADATA,sha256=khQkNMvPEvKdkWqjP5c71z-SNmj6ey3cJipV46pq_aE,718
+eval_studio_client-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+eval_studio_client-1.0.2.dist-info/RECORD,,

{eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.26.3
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

eval-studio-client 1.0.0a1__py3-none-any.whl → 1.0.2__py3-none-any.whl

eval-studio-client 1.0.0a1py3-none-any.whl → 1.0.2py3-none-any.whl