eval-studio-client 1.0.0a1__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ from eval_studio_client import api
9
9
  from eval_studio_client import insights as i6s
10
10
  from eval_studio_client import leaderboards as l10s
11
11
  from eval_studio_client import problems as p6s
12
+ from eval_studio_client import utils
12
13
  from eval_studio_client.api import models
13
14
 
14
15
 
@@ -41,6 +42,7 @@ class Dashboard:
41
42
  self._dashboard_api = api.DashboardServiceApi(self._client)
42
43
  self._leaderboard_api = api.LeaderboardServiceApi(self._client)
43
44
  self._info_api = api.InfoServiceApi(self._client)
45
+ self._operation_api = api.OperationServiceApi(self._client)
44
46
 
45
47
  @property
46
48
  def leaderboards(self) -> Optional[List[l10s.Leaderboard]]:
@@ -118,36 +120,56 @@ class Dashboard:
118
120
  else:
119
121
  raise ValueError("Cannot establish connection to Eval Studio host.")
120
122
 
121
- def wait_to_finish(self, timeout: Optional[float] = None):
123
+ def wait_to_finish(self, timeout: Optional[float] = None, verbose: bool = False):
122
124
  """Waits for the dashboard to finish.
123
125
 
124
126
  Args:
125
127
  timeout: The maximum time to wait in seconds.
128
+ verbose (bool): If True, prints the status of the evaluation while waiting.
126
129
  """
127
130
  timeout = timeout or float("inf")
131
+ progress_bar = utils.ProgressBar()
128
132
  if self.finished:
129
133
  return
130
134
 
135
+ if not self._create_operation:
136
+ # This means that the evaluation has no assigned operation, thus cannot poll.
137
+ raise RuntimeError("Failed to retrieve running evaluation info.")
138
+
131
139
  if self._client:
132
140
  ctr = 0
133
141
  while ctr < timeout:
134
- lbs = self.leaderboards
135
- if lbs:
136
- if all(lb.finished for lb in lbs):
137
- return
138
-
139
- ctr += 1
140
- time.sleep(1)
142
+ op = self._operation_api.operation_service_get_operation(
143
+ self._create_operation
144
+ )
145
+ if not op or not op.operation:
146
+ raise RuntimeError(
147
+ "Failed to retrieve running evaluation progress."
148
+ )
149
+
150
+ if verbose:
151
+ if not op.operation.metadata:
152
+ raise RuntimeError(
153
+ "Failed to retrieve running evaluation progress details."
154
+ )
155
+
156
+ op_meta = op.operation.metadata.to_dict()
157
+ progress = op_meta.get("progress", 0)
158
+ progress_msg = op_meta.get("progressMessage", "Running")
159
+ progress_bar.update(progress, progress_msg)
160
+
161
+ if op.operation.done:
162
+ return
163
+
164
+ ctr += 1
165
+ time.sleep(1)
141
166
  else:
142
167
  raise ValueError("Cannot establish connection to Eval Studio host.")
143
168
 
144
169
  raise TimeoutError("Waiting timeout has been reached.")
145
170
 
146
- def show(self):
147
- """Opens the evaluation in the default web browser.
148
-
149
- NOTE: This functionality is primarily for interactive use in Jupyter notebooks.
150
- """
171
+ def show(self) -> str:
172
+ """Prints the endpoint URL of the evaluation dashboard."""
151
173
  if self._client:
152
174
  info_res = self._info_api.info_service_get_info()
153
175
  if not info_res or not info_res.info:
@@ -155,11 +177,8 @@ class Dashboard:
155
177
 
156
178
  host = info_res.info.base_url
157
179
  url = urllib.parse.urljoin(host, self.key)
158
-
159
- # NOTE: Local import is used to avoid problems for users outside Jupyter environment.
160
- import webbrowser
161
-
162
- webbrowser.open(url)
180
+ print(f"Open following url to access evaluation dashboard: \n\n{url}")
181
+ return url
163
182
  else:
164
183
  raise ValueError("Cannot establish connection to Eval Studio host.")
165
184
 
@@ -39,6 +39,7 @@ class Leaderboard:
39
39
  _model_name: Optional[str] = None
40
40
  _status: Optional[models.V1LeaderboardStatus] = None
41
41
  _client: Optional[api.ApiClient] = None
42
+ _operation: Optional[str] = None
42
43
 
43
44
  def __post_init__(self):
44
45
  self._evaluator_api = api.EvaluatorServiceApi(self._client)
@@ -198,6 +199,7 @@ class Leaderboard:
198
199
  _leaderboard=api_leaderboard.leaderboard_table,
199
200
  _status=api_leaderboard.status,
200
201
  _client=client,
202
+ _operation=api_leaderboard.create_operation or None,
201
203
  )
202
204
 
203
205
  @staticmethod
@@ -11,6 +11,7 @@ from typing import Union
11
11
  from eval_studio_client import api
12
12
  from eval_studio_client import documents as d7s
13
13
  from eval_studio_client import perturbators as p10s
14
+ from eval_studio_client import utils
14
15
  from eval_studio_client.api import models
15
16
 
16
17
 
@@ -85,15 +86,9 @@ class TestCaseGenerator(enum.Enum):
85
86
 
86
87
 
87
88
  @dataclasses.dataclass
88
- class TestCaseGenerationHandle:
89
+ class _TestCaseGenerationHandle:
89
90
 
90
91
  name: Any | None
91
- create_time: Optional[datetime.datetime] = None
92
- creator: Optional[str] = None
93
- update_time: Optional[datetime.datetime] = None
94
- updater: Optional[str] = None
95
- delete_time: Optional[datetime.datetime] = None
96
- deleter: Optional[str] = None
97
92
  progress: Optional[float] = None
98
93
  progress_message: Optional[str] = None
99
94
  error: Optional[models.RpcStatus] = None
@@ -102,11 +97,11 @@ class TestCaseGenerationHandle:
102
97
  @staticmethod
103
98
  def _from_operation(
104
99
  res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
105
- ) -> "TestCaseGenerationHandle":
100
+ ) -> "_TestCaseGenerationHandle":
106
101
  """Converts an API operation to prompt generation handle."""
107
102
  op: models.V1Operation | None = res.operation
108
103
  if not op:
109
- return TestCaseGenerationHandle(name=None)
104
+ return _TestCaseGenerationHandle(name=None)
110
105
 
111
106
  # progress
112
107
  if hasattr(op, "metadata") and op.metadata:
@@ -114,14 +109,8 @@ class TestCaseGenerationHandle:
114
109
  else:
115
110
  meta_dict = {}
116
111
 
117
- return TestCaseGenerationHandle(
112
+ return _TestCaseGenerationHandle(
118
113
  name=op.name,
119
- create_time=op.create_time,
120
- creator=op.creator,
121
- update_time=op.update_time,
122
- updater=op.updater,
123
- delete_time=op.delete_time,
124
- deleter=op.deleter,
125
114
  progress=meta_dict.get("progress"),
126
115
  progress_message=meta_dict.get("progressMessage"),
127
116
  error=op.error,
@@ -193,6 +182,7 @@ class Test:
193
182
  create_time: Optional[datetime.datetime] = None
194
183
  update_time: Optional[datetime.datetime] = None
195
184
  _client: Optional[api.ApiClient] = None
185
+ _gen_tc_op_name: Optional[str] = None
196
186
 
197
187
  def __post_init__(self):
198
188
  if self._client:
@@ -272,7 +262,7 @@ class Test:
272
262
  base_llm_model: Optional[str] = None,
273
263
  generators: Optional[List[TestCaseGenerator]] = None,
274
264
  existing_collection: Optional[str] = None,
275
- ) -> "TestCaseGenerationHandle":
265
+ ) -> None:
276
266
  """Generates test cases based on the documents of the Test.
277
267
 
278
268
  Args:
@@ -296,28 +286,28 @@ class Test:
296
286
 
297
287
  res = self._test_api.test_service_generate_test_cases(self.key, req)
298
288
 
299
- return TestCaseGenerationHandle._from_operation(res)
289
+ op: models.V1Operation | None = res.operation
290
+ self._gen_tc_op_name = op.name if op else None
300
291
 
301
292
  def wait_for_test_case_generation(
302
- self,
303
- handle: TestCaseGenerationHandle,
304
- timeout: Optional[float] = None,
305
- verbose: bool = False,
306
- ) -> TestCaseGenerationHandle:
293
+ self, timeout: Optional[float] = None, verbose: bool = False
294
+ ) -> None:
307
295
  """Waits for the test case generation to finish.
308
296
 
309
297
  Args:
310
- handle (TestCaseGenerationHandle): Handle of the test case generation.
311
298
  timeout (float): The maximum time to wait in seconds.
312
299
  verbose (bool): If True, prints the status of the handle while waiting.
313
300
  """
314
- if not handle.name:
315
- raise ValueError("Test case generation handle is not valid.")
316
- elif handle.done:
317
- return handle
301
+ if not self._gen_tc_op_name:
302
+ raise ValueError(
303
+ "There is no ongoing test case generation - the operation name is not "
304
+ "set."
305
+ )
318
306
 
319
307
  if verbose:
320
- print(f"Waiting for test case generation to finish ({handle.name}):")
308
+ print(
309
+ f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
310
+ )
321
311
  if self._client:
322
312
  # exponential backoff
323
313
  wait_time = 1.0
@@ -325,37 +315,30 @@ class Test:
325
315
  wait_max = 8.0
326
316
  wait_total = 0.0
327
317
  timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
328
- # progress
329
- p_max = 1.0
330
- p_msg = ""
318
+ progress_bar = utils.ProgressBar()
331
319
  while wait_total < timeout:
332
- handle = TestCaseGenerationHandle._from_operation(
333
- self._operation_api.operation_service_get_operation(handle.name)
320
+ handle = _TestCaseGenerationHandle._from_operation(
321
+ self._operation_api.operation_service_get_operation(
322
+ self._gen_tc_op_name
323
+ )
334
324
  )
335
325
 
336
326
  if verbose:
337
- print(" " * len(p_msg), end="\r")
338
- if handle.progress or handle.progress_message:
339
- try:
340
- h_progress = float(str(handle.progress))
341
- except ValueError:
342
- h_progress = 0.0
343
- h_msg = handle.progress_message or "Processing"
344
- else:
345
- h_progress = 0.0
346
- h_msg = "Initializing"
347
- p_progress = int(h_progress / p_max * 100)
348
- p_hashes = p_progress // 5
349
- p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
350
- print(p_msg, end="\r")
327
+ progress_bar.update(handle.progress or 0, handle.progress_message)
351
328
 
352
329
  if handle.done:
353
- return handle
330
+ if handle.error:
331
+ raise RuntimeError(
332
+ f"Test case generation failed: {handle.error}"
333
+ )
334
+ return
354
335
 
355
336
  wait_time *= wait_coef
356
337
  time.sleep(min(wait_time, wait_max))
357
338
  else:
358
- raise ValueError("Cannot establish connection to Eval Studio host.")
339
+ raise ValueError(
340
+ "Unable to establish a connection to the Eval Studio host."
341
+ )
359
342
 
360
343
  raise TimeoutError("Waiting timeout has been reached.")
361
344
 
@@ -0,0 +1,26 @@
1
+ from typing import Optional
2
+
3
+
4
+ class ProgressBar:
5
+ def __init__(self):
6
+ self.progress = 0.0
7
+ self.progress_message = "Initializing"
8
+ self._progress_max = 1.0
9
+
10
+ def update(self, progress: float, message: Optional[str] = None):
11
+ try:
12
+ self.progress = float(str(progress))
13
+ except ValueError:
14
+ self.progress = 0.0
15
+
16
+ if message:
17
+ self.progress_message = message or ""
18
+
19
+ self.print()
20
+
21
+ def print(self):
22
+ print(" " * len(self.progress_message), end="\r")
23
+ p_progress = int(self.progress / self._progress_max * 100)
24
+ p_hashes = p_progress // 5
25
+ p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
26
+ print(p_msg, end="\r")
@@ -1,10 +1,10 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: eval-studio-client
3
- Version: 1.0.0a1
3
+ Version: 1.0.2
4
4
  Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
5
5
  Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
6
6
  Author-email: "H2O.ai" <support@h2o.ai>
7
- License: MIT
7
+ License-Expression: MIT
8
8
  Classifier: Development Status :: 4 - Beta
9
9
  Classifier: Programming Language :: Python
10
10
  Classifier: Programming Language :: Python :: 3.9
@@ -1,16 +1,17 @@
1
1
  eval_studio_client/__about__.py,sha256=7TnXVu0lNAY4UdQ_2iwTlAENGdigMUVBy6UmtWGB6sQ,30
2
2
  eval_studio_client/__init__.py,sha256=v8lXY_l4j3lAbIfW21nZFeWZX0sl4nKHbB29h2qYVU8,207
3
3
  eval_studio_client/client.py,sha256=khRFtcFNZHAMe1bA7SyvoLOPHVZQ2XJOZ3UB3gX8EKs,3307
4
- eval_studio_client/dashboards.py,sha256=S35kude0FSn-v0t-H1N6aHhsNhlmIgF3duKR8TUfKes,7331
4
+ eval_studio_client/dashboards.py,sha256=TBMiO4OvTnWYSVuj2-EBxSdKQtEAb_HXgc9gXtRnu-s,8381
5
5
  eval_studio_client/documents.py,sha256=fjsbHnqZnouu0stCf_p15RgoszkY4_gIsbX1hiw7Xv8,3076
6
6
  eval_studio_client/evaluators.py,sha256=blJlWMswIGr1u6TQDiiO-fInYVnkBT0Y02J57o8Z094,2100
7
7
  eval_studio_client/insights.py,sha256=bhe6XBVJ61-2bcDdNe6HiZsu0sly8LeoYAKo1GkgK08,1199
8
- eval_studio_client/leaderboards.py,sha256=5S4cJVS8bX_KoRcT_75eXxrDY-xdfkQdehwGgIgIBfU,7933
8
+ eval_studio_client/leaderboards.py,sha256=NHko_kuPIXnbBdEDMK1MHQmHJRCHA7_Q1wx4eqBvBF8,8035
9
9
  eval_studio_client/models.py,sha256=nW1Wk6L89iWSjhMVk_sKmxSomKX3b6ANALbwWvbJ7Uk,21346
10
10
  eval_studio_client/perturbators.py,sha256=CtcWqEgPGpOcDHvYAQBlNDKnS-ZDBkL7Y_Ygsgpvikw,3133
11
11
  eval_studio_client/problems.py,sha256=rdGIfo7AqyxGhWMpbIDX1WXFoQvzKktKAWDKRde5VbY,1515
12
12
  eval_studio_client/test_labs.py,sha256=IEY98Ocu7WQcxZN_jy5YthVBoHAgHjgA2T93U7q0eYE,11260
13
- eval_studio_client/tests.py,sha256=xMKI3OC-dRHlss484gkuLWcF-XFuLZxx7-XMIuNmAxU,23236
13
+ eval_studio_client/tests.py,sha256=_Qu6X4FoocYJ-liClXLQqIR91P7GjWmxpeyDhRl5JXI,22393
14
+ eval_studio_client/utils.py,sha256=e5bsQVgNHYNSqSOthxlmncerPdgbvWwQaY_C-libuXk,764
14
15
  eval_studio_client/api/__init__.py,sha256=Ef5qooH4SLfYUqVBJl79oRKWYnXryDPZV4IXGfvG1Wc,15269
15
16
  eval_studio_client/api/api_client.py,sha256=yFQKmCsVhswcTbdGY4lf-61mf8FVm3Kfon8Qhe1sPKw,26431
16
17
  eval_studio_client/api/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
@@ -480,6 +481,6 @@ eval_studio_client/api/test/test_v1_update_test_response.py,sha256=pqTwL9SgoOM9k
480
481
  eval_studio_client/api/test/test_v1_who_am_i_response.py,sha256=bNbjL5-b-4asyziW6znJhuU2yrzd9RgJa2ZiNw3e6YA,1523
481
482
  eval_studio_client/api/test/test_who_am_i_service_api.py,sha256=gYWKFamJMyVne2QaOSPz6WEkxExRuAphMGKf1nFayLU,898
482
483
  eval_studio_client/gen/openapiv2/eval_studio.swagger.json,sha256=2jOBBxQ2H2mS9C_nlqoTrTiYMmCLaUFQym6su3fXJ8I,210976
483
- eval_studio_client-1.0.0a1.dist-info/METADATA,sha256=rX1UrncVa_ayrO30V9oeNhTjqV1EWNyBFOvL2q8YJ9c,709
484
- eval_studio_client-1.0.0a1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
485
- eval_studio_client-1.0.0a1.dist-info/RECORD,,
484
+ eval_studio_client-1.0.2.dist-info/METADATA,sha256=khQkNMvPEvKdkWqjP5c71z-SNmj6ey3cJipV46pq_aE,718
485
+ eval_studio_client-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
486
+ eval_studio_client-1.0.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.26.3
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any