eval-studio-client 1.0.0a1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/dashboards.py +37 -18
- eval_studio_client/leaderboards.py +2 -0
- eval_studio_client/tests.py +33 -50
- eval_studio_client/utils.py +26 -0
- {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.2.dist-info}/METADATA +3 -3
- {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.2.dist-info}/RECORD +7 -6
- {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.2.dist-info}/WHEEL +1 -1
eval_studio_client/dashboards.py
CHANGED
|
@@ -9,6 +9,7 @@ from eval_studio_client import api
|
|
|
9
9
|
from eval_studio_client import insights as i6s
|
|
10
10
|
from eval_studio_client import leaderboards as l10s
|
|
11
11
|
from eval_studio_client import problems as p6s
|
|
12
|
+
from eval_studio_client import utils
|
|
12
13
|
from eval_studio_client.api import models
|
|
13
14
|
|
|
14
15
|
|
|
@@ -41,6 +42,7 @@ class Dashboard:
|
|
|
41
42
|
self._dashboard_api = api.DashboardServiceApi(self._client)
|
|
42
43
|
self._leaderboard_api = api.LeaderboardServiceApi(self._client)
|
|
43
44
|
self._info_api = api.InfoServiceApi(self._client)
|
|
45
|
+
self._operation_api = api.OperationServiceApi(self._client)
|
|
44
46
|
|
|
45
47
|
@property
|
|
46
48
|
def leaderboards(self) -> Optional[List[l10s.Leaderboard]]:
|
|
@@ -118,36 +120,56 @@ class Dashboard:
|
|
|
118
120
|
else:
|
|
119
121
|
raise ValueError("Cannot establish connection to Eval Studio host.")
|
|
120
122
|
|
|
121
|
-
def wait_to_finish(self, timeout: Optional[float] = None):
|
|
123
|
+
def wait_to_finish(self, timeout: Optional[float] = None, verbose: bool = False):
|
|
122
124
|
"""Waits for the dashboard to finish.
|
|
123
125
|
|
|
124
126
|
Args:
|
|
125
127
|
timeout: The maximum time to wait in seconds.
|
|
128
|
+
verbose (bool): If True, prints the status of the evaluation while waiting.
|
|
126
129
|
"""
|
|
127
130
|
timeout = timeout or float("inf")
|
|
131
|
+
progress_bar = utils.ProgressBar()
|
|
128
132
|
if self.finished:
|
|
129
133
|
return
|
|
130
134
|
|
|
135
|
+
if not self._create_operation:
|
|
136
|
+
# This means that the evaluation has no assigned operation, thus cannot poll.
|
|
137
|
+
raise RuntimeError("Failed to retrieve running evaluation info.")
|
|
138
|
+
|
|
131
139
|
if self._client:
|
|
132
140
|
ctr = 0
|
|
133
141
|
while ctr < timeout:
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
142
|
+
op = self._operation_api.operation_service_get_operation(
|
|
143
|
+
self._create_operation
|
|
144
|
+
)
|
|
145
|
+
if not op or not op.operation:
|
|
146
|
+
raise RuntimeError(
|
|
147
|
+
"Failed to retrieve running evaluation progress."
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if verbose:
|
|
151
|
+
if not op.operation.metadata:
|
|
152
|
+
raise RuntimeError(
|
|
153
|
+
"Failed to retrieve running evaluation progress details."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
op_meta = op.operation.metadata.to_dict()
|
|
157
|
+
progress = op_meta.get("progress", 0)
|
|
158
|
+
progress_msg = op_meta.get("progressMessage", "Running")
|
|
159
|
+
progress_bar.update(progress, progress_msg)
|
|
160
|
+
|
|
161
|
+
if op.operation.done:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
ctr += 1
|
|
165
|
+
time.sleep(1)
|
|
141
166
|
else:
|
|
142
167
|
raise ValueError("Cannot establish connection to Eval Studio host.")
|
|
143
168
|
|
|
144
169
|
raise TimeoutError("Waiting timeout has been reached.")
|
|
145
170
|
|
|
146
|
-
def show(self):
|
|
147
|
-
"""
|
|
148
|
-
|
|
149
|
-
NOTE: This functionality is primarily for interactive use in Jupyter notebooks.
|
|
150
|
-
"""
|
|
171
|
+
def show(self) -> str:
|
|
172
|
+
"""Prints the endpoint URL of the evaluation dashboard."""
|
|
151
173
|
if self._client:
|
|
152
174
|
info_res = self._info_api.info_service_get_info()
|
|
153
175
|
if not info_res or not info_res.info:
|
|
@@ -155,11 +177,8 @@ class Dashboard:
|
|
|
155
177
|
|
|
156
178
|
host = info_res.info.base_url
|
|
157
179
|
url = urllib.parse.urljoin(host, self.key)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
import webbrowser
|
|
161
|
-
|
|
162
|
-
webbrowser.open(url)
|
|
180
|
+
print(f"Open following url to access evaluation dashboard: \n\n{url}")
|
|
181
|
+
return url
|
|
163
182
|
else:
|
|
164
183
|
raise ValueError("Cannot establish connection to Eval Studio host.")
|
|
165
184
|
|
|
@@ -39,6 +39,7 @@ class Leaderboard:
|
|
|
39
39
|
_model_name: Optional[str] = None
|
|
40
40
|
_status: Optional[models.V1LeaderboardStatus] = None
|
|
41
41
|
_client: Optional[api.ApiClient] = None
|
|
42
|
+
_operation: Optional[str] = None
|
|
42
43
|
|
|
43
44
|
def __post_init__(self):
|
|
44
45
|
self._evaluator_api = api.EvaluatorServiceApi(self._client)
|
|
@@ -198,6 +199,7 @@ class Leaderboard:
|
|
|
198
199
|
_leaderboard=api_leaderboard.leaderboard_table,
|
|
199
200
|
_status=api_leaderboard.status,
|
|
200
201
|
_client=client,
|
|
202
|
+
_operation=api_leaderboard.create_operation or None,
|
|
201
203
|
)
|
|
202
204
|
|
|
203
205
|
@staticmethod
|
eval_studio_client/tests.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Union
|
|
|
11
11
|
from eval_studio_client import api
|
|
12
12
|
from eval_studio_client import documents as d7s
|
|
13
13
|
from eval_studio_client import perturbators as p10s
|
|
14
|
+
from eval_studio_client import utils
|
|
14
15
|
from eval_studio_client.api import models
|
|
15
16
|
|
|
16
17
|
|
|
@@ -85,15 +86,9 @@ class TestCaseGenerator(enum.Enum):
|
|
|
85
86
|
|
|
86
87
|
|
|
87
88
|
@dataclasses.dataclass
|
|
88
|
-
class
|
|
89
|
+
class _TestCaseGenerationHandle:
|
|
89
90
|
|
|
90
91
|
name: Any | None
|
|
91
|
-
create_time: Optional[datetime.datetime] = None
|
|
92
|
-
creator: Optional[str] = None
|
|
93
|
-
update_time: Optional[datetime.datetime] = None
|
|
94
|
-
updater: Optional[str] = None
|
|
95
|
-
delete_time: Optional[datetime.datetime] = None
|
|
96
|
-
deleter: Optional[str] = None
|
|
97
92
|
progress: Optional[float] = None
|
|
98
93
|
progress_message: Optional[str] = None
|
|
99
94
|
error: Optional[models.RpcStatus] = None
|
|
@@ -102,11 +97,11 @@ class TestCaseGenerationHandle:
|
|
|
102
97
|
@staticmethod
|
|
103
98
|
def _from_operation(
|
|
104
99
|
res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
|
|
105
|
-
) -> "
|
|
100
|
+
) -> "_TestCaseGenerationHandle":
|
|
106
101
|
"""Converts an API operation to prompt generation handle."""
|
|
107
102
|
op: models.V1Operation | None = res.operation
|
|
108
103
|
if not op:
|
|
109
|
-
return
|
|
104
|
+
return _TestCaseGenerationHandle(name=None)
|
|
110
105
|
|
|
111
106
|
# progress
|
|
112
107
|
if hasattr(op, "metadata") and op.metadata:
|
|
@@ -114,14 +109,8 @@ class TestCaseGenerationHandle:
|
|
|
114
109
|
else:
|
|
115
110
|
meta_dict = {}
|
|
116
111
|
|
|
117
|
-
return
|
|
112
|
+
return _TestCaseGenerationHandle(
|
|
118
113
|
name=op.name,
|
|
119
|
-
create_time=op.create_time,
|
|
120
|
-
creator=op.creator,
|
|
121
|
-
update_time=op.update_time,
|
|
122
|
-
updater=op.updater,
|
|
123
|
-
delete_time=op.delete_time,
|
|
124
|
-
deleter=op.deleter,
|
|
125
114
|
progress=meta_dict.get("progress"),
|
|
126
115
|
progress_message=meta_dict.get("progressMessage"),
|
|
127
116
|
error=op.error,
|
|
@@ -193,6 +182,7 @@ class Test:
|
|
|
193
182
|
create_time: Optional[datetime.datetime] = None
|
|
194
183
|
update_time: Optional[datetime.datetime] = None
|
|
195
184
|
_client: Optional[api.ApiClient] = None
|
|
185
|
+
_gen_tc_op_name: Optional[str] = None
|
|
196
186
|
|
|
197
187
|
def __post_init__(self):
|
|
198
188
|
if self._client:
|
|
@@ -272,7 +262,7 @@ class Test:
|
|
|
272
262
|
base_llm_model: Optional[str] = None,
|
|
273
263
|
generators: Optional[List[TestCaseGenerator]] = None,
|
|
274
264
|
existing_collection: Optional[str] = None,
|
|
275
|
-
) ->
|
|
265
|
+
) -> None:
|
|
276
266
|
"""Generates test cases based on the documents of the Test.
|
|
277
267
|
|
|
278
268
|
Args:
|
|
@@ -296,28 +286,28 @@ class Test:
|
|
|
296
286
|
|
|
297
287
|
res = self._test_api.test_service_generate_test_cases(self.key, req)
|
|
298
288
|
|
|
299
|
-
|
|
289
|
+
op: models.V1Operation | None = res.operation
|
|
290
|
+
self._gen_tc_op_name = op.name if op else None
|
|
300
291
|
|
|
301
292
|
def wait_for_test_case_generation(
|
|
302
|
-
self,
|
|
303
|
-
|
|
304
|
-
timeout: Optional[float] = None,
|
|
305
|
-
verbose: bool = False,
|
|
306
|
-
) -> TestCaseGenerationHandle:
|
|
293
|
+
self, timeout: Optional[float] = None, verbose: bool = False
|
|
294
|
+
) -> None:
|
|
307
295
|
"""Waits for the test case generation to finish.
|
|
308
296
|
|
|
309
297
|
Args:
|
|
310
|
-
handle (TestCaseGenerationHandle): Handle of the test case generation.
|
|
311
298
|
timeout (float): The maximum time to wait in seconds.
|
|
312
299
|
verbose (bool): If True, prints the status of the handle while waiting.
|
|
313
300
|
"""
|
|
314
|
-
if not
|
|
315
|
-
raise ValueError(
|
|
316
|
-
|
|
317
|
-
|
|
301
|
+
if not self._gen_tc_op_name:
|
|
302
|
+
raise ValueError(
|
|
303
|
+
"There is no ongoing test case generation - the operation name is not "
|
|
304
|
+
"set."
|
|
305
|
+
)
|
|
318
306
|
|
|
319
307
|
if verbose:
|
|
320
|
-
print(
|
|
308
|
+
print(
|
|
309
|
+
f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
|
|
310
|
+
)
|
|
321
311
|
if self._client:
|
|
322
312
|
# exponential backoff
|
|
323
313
|
wait_time = 1.0
|
|
@@ -325,37 +315,30 @@ class Test:
|
|
|
325
315
|
wait_max = 8.0
|
|
326
316
|
wait_total = 0.0
|
|
327
317
|
timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
|
|
328
|
-
|
|
329
|
-
p_max = 1.0
|
|
330
|
-
p_msg = ""
|
|
318
|
+
progress_bar = utils.ProgressBar()
|
|
331
319
|
while wait_total < timeout:
|
|
332
|
-
handle =
|
|
333
|
-
self._operation_api.operation_service_get_operation(
|
|
320
|
+
handle = _TestCaseGenerationHandle._from_operation(
|
|
321
|
+
self._operation_api.operation_service_get_operation(
|
|
322
|
+
self._gen_tc_op_name
|
|
323
|
+
)
|
|
334
324
|
)
|
|
335
325
|
|
|
336
326
|
if verbose:
|
|
337
|
-
|
|
338
|
-
if handle.progress or handle.progress_message:
|
|
339
|
-
try:
|
|
340
|
-
h_progress = float(str(handle.progress))
|
|
341
|
-
except ValueError:
|
|
342
|
-
h_progress = 0.0
|
|
343
|
-
h_msg = handle.progress_message or "Processing"
|
|
344
|
-
else:
|
|
345
|
-
h_progress = 0.0
|
|
346
|
-
h_msg = "Initializing"
|
|
347
|
-
p_progress = int(h_progress / p_max * 100)
|
|
348
|
-
p_hashes = p_progress // 5
|
|
349
|
-
p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
|
|
350
|
-
print(p_msg, end="\r")
|
|
327
|
+
progress_bar.update(handle.progress or 0, handle.progress_message)
|
|
351
328
|
|
|
352
329
|
if handle.done:
|
|
353
|
-
|
|
330
|
+
if handle.error:
|
|
331
|
+
raise RuntimeError(
|
|
332
|
+
f"Test case generation failed: {handle.error}"
|
|
333
|
+
)
|
|
334
|
+
return
|
|
354
335
|
|
|
355
336
|
wait_time *= wait_coef
|
|
356
337
|
time.sleep(min(wait_time, wait_max))
|
|
357
338
|
else:
|
|
358
|
-
raise ValueError(
|
|
339
|
+
raise ValueError(
|
|
340
|
+
"Unable to establish a connection to the Eval Studio host."
|
|
341
|
+
)
|
|
359
342
|
|
|
360
343
|
raise TimeoutError("Waiting timeout has been reached.")
|
|
361
344
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ProgressBar:
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self.progress = 0.0
|
|
7
|
+
self.progress_message = "Initializing"
|
|
8
|
+
self._progress_max = 1.0
|
|
9
|
+
|
|
10
|
+
def update(self, progress: float, message: Optional[str] = None):
|
|
11
|
+
try:
|
|
12
|
+
self.progress = float(str(progress))
|
|
13
|
+
except ValueError:
|
|
14
|
+
self.progress = 0.0
|
|
15
|
+
|
|
16
|
+
if message:
|
|
17
|
+
self.progress_message = message or ""
|
|
18
|
+
|
|
19
|
+
self.print()
|
|
20
|
+
|
|
21
|
+
def print(self):
|
|
22
|
+
print(" " * len(self.progress_message), end="\r")
|
|
23
|
+
p_progress = int(self.progress / self._progress_max * 100)
|
|
24
|
+
p_hashes = p_progress // 5
|
|
25
|
+
p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
|
|
26
|
+
print(p_msg, end="\r")
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-studio-client
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
|
|
5
5
|
Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
|
|
6
6
|
Author-email: "H2O.ai" <support@h2o.ai>
|
|
7
|
-
License: MIT
|
|
7
|
+
License-Expression: MIT
|
|
8
8
|
Classifier: Development Status :: 4 - Beta
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
eval_studio_client/__about__.py,sha256=7TnXVu0lNAY4UdQ_2iwTlAENGdigMUVBy6UmtWGB6sQ,30
|
|
2
2
|
eval_studio_client/__init__.py,sha256=v8lXY_l4j3lAbIfW21nZFeWZX0sl4nKHbB29h2qYVU8,207
|
|
3
3
|
eval_studio_client/client.py,sha256=khRFtcFNZHAMe1bA7SyvoLOPHVZQ2XJOZ3UB3gX8EKs,3307
|
|
4
|
-
eval_studio_client/dashboards.py,sha256=
|
|
4
|
+
eval_studio_client/dashboards.py,sha256=TBMiO4OvTnWYSVuj2-EBxSdKQtEAb_HXgc9gXtRnu-s,8381
|
|
5
5
|
eval_studio_client/documents.py,sha256=fjsbHnqZnouu0stCf_p15RgoszkY4_gIsbX1hiw7Xv8,3076
|
|
6
6
|
eval_studio_client/evaluators.py,sha256=blJlWMswIGr1u6TQDiiO-fInYVnkBT0Y02J57o8Z094,2100
|
|
7
7
|
eval_studio_client/insights.py,sha256=bhe6XBVJ61-2bcDdNe6HiZsu0sly8LeoYAKo1GkgK08,1199
|
|
8
|
-
eval_studio_client/leaderboards.py,sha256=
|
|
8
|
+
eval_studio_client/leaderboards.py,sha256=NHko_kuPIXnbBdEDMK1MHQmHJRCHA7_Q1wx4eqBvBF8,8035
|
|
9
9
|
eval_studio_client/models.py,sha256=nW1Wk6L89iWSjhMVk_sKmxSomKX3b6ANALbwWvbJ7Uk,21346
|
|
10
10
|
eval_studio_client/perturbators.py,sha256=CtcWqEgPGpOcDHvYAQBlNDKnS-ZDBkL7Y_Ygsgpvikw,3133
|
|
11
11
|
eval_studio_client/problems.py,sha256=rdGIfo7AqyxGhWMpbIDX1WXFoQvzKktKAWDKRde5VbY,1515
|
|
12
12
|
eval_studio_client/test_labs.py,sha256=IEY98Ocu7WQcxZN_jy5YthVBoHAgHjgA2T93U7q0eYE,11260
|
|
13
|
-
eval_studio_client/tests.py,sha256=
|
|
13
|
+
eval_studio_client/tests.py,sha256=_Qu6X4FoocYJ-liClXLQqIR91P7GjWmxpeyDhRl5JXI,22393
|
|
14
|
+
eval_studio_client/utils.py,sha256=e5bsQVgNHYNSqSOthxlmncerPdgbvWwQaY_C-libuXk,764
|
|
14
15
|
eval_studio_client/api/__init__.py,sha256=Ef5qooH4SLfYUqVBJl79oRKWYnXryDPZV4IXGfvG1Wc,15269
|
|
15
16
|
eval_studio_client/api/api_client.py,sha256=yFQKmCsVhswcTbdGY4lf-61mf8FVm3Kfon8Qhe1sPKw,26431
|
|
16
17
|
eval_studio_client/api/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
|
|
@@ -480,6 +481,6 @@ eval_studio_client/api/test/test_v1_update_test_response.py,sha256=pqTwL9SgoOM9k
|
|
|
480
481
|
eval_studio_client/api/test/test_v1_who_am_i_response.py,sha256=bNbjL5-b-4asyziW6znJhuU2yrzd9RgJa2ZiNw3e6YA,1523
|
|
481
482
|
eval_studio_client/api/test/test_who_am_i_service_api.py,sha256=gYWKFamJMyVne2QaOSPz6WEkxExRuAphMGKf1nFayLU,898
|
|
482
483
|
eval_studio_client/gen/openapiv2/eval_studio.swagger.json,sha256=2jOBBxQ2H2mS9C_nlqoTrTiYMmCLaUFQym6su3fXJ8I,210976
|
|
483
|
-
eval_studio_client-1.0.
|
|
484
|
-
eval_studio_client-1.0.
|
|
485
|
-
eval_studio_client-1.0.
|
|
484
|
+
eval_studio_client-1.0.2.dist-info/METADATA,sha256=khQkNMvPEvKdkWqjP5c71z-SNmj6ey3cJipV46pq_aE,718
|
|
485
|
+
eval_studio_client-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
486
|
+
eval_studio_client-1.0.2.dist-info/RECORD,,
|