eval-studio-client 0.8.2__py3-none-any.whl → 1.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,6 +33,7 @@ class Leaderboard:
33
33
  update_time: Optional[datetime.datetime] = None
34
34
  problems: List[p6s.Problem] = dataclasses.field(default_factory=list)
35
35
  insights: List[i6s.Insight] = dataclasses.field(default_factory=list)
36
+ existing_collection: Optional[str] = None
36
37
  _report: Optional[str] = None
37
38
  _leaderboard: Optional[str] = None
38
39
  _model_name: Optional[str] = None
@@ -161,6 +162,7 @@ class Leaderboard:
161
162
  evaluator=self._evaluator_name,
162
163
  tests=self._test_names,
163
164
  model=self._model_name,
165
+ h2ogpte_collection=self.existing_collection or None,
164
166
  )
165
167
 
166
168
  def _update_result(self, api_leaderboard: models.V1Leaderboard):
@@ -189,6 +191,7 @@ class Leaderboard:
189
191
  update_time=api_leaderboard.update_time,
190
192
  problems=problems,
191
193
  insights=insights,
194
+ existing_collection=api_leaderboard.h2ogpte_collection or None,
192
195
  _evaluator_name=api_leaderboard.evaluator or "",
193
196
  _test_names=api_leaderboard.tests or [],
194
197
  _report=api_leaderboard.leaderboard_report or "",
@@ -22,6 +22,28 @@ DEFAULT_RAG_MODEL_KEY = "models/defaultRAGModel"
22
22
  DEFAULT_LLM_MODEL_KEY = "models/defaultLLMModel"
23
23
 
24
24
 
25
+ @dataclasses.dataclass
26
+ class CollectionInfo:
27
+ """Represents the information about a collection in the H2OGPTE
28
+ or a Knowledge Base in Amazon Bedrock.
29
+ """
30
+
31
+ key: str
32
+ name: str
33
+ description: str
34
+
35
+ def __str__(self):
36
+ return f"{self.name} ({self.key})"
37
+
38
+ @staticmethod
39
+ def _from_api_collection_info(api_col: models.V1CollectionInfo) -> "CollectionInfo":
40
+ return CollectionInfo(
41
+ key=api_col.id or "",
42
+ name=api_col.display_name or "",
43
+ description=api_col.description or "",
44
+ )
45
+
46
+
25
47
  @dataclasses.dataclass
26
48
  class Model:
27
49
  """Represents Eval Studio connection to an external RAG/LLM system.
@@ -80,6 +102,28 @@ class Model:
80
102
 
81
103
  return result
82
104
 
105
+ @property
106
+ def base_models(self) -> List[str]:
107
+ """List of base LLM models available to use e.g. for the evaluation."""
108
+ res = self._model_api.model_service_list_base_models(self.key)
109
+ if res and res.base_models:
110
+ return [str(m) for m in res.base_models]
111
+
112
+ raise RuntimeError("Failed to list base models")
113
+
114
+ @property
115
+ def collections(self) -> List[CollectionInfo]:
116
+ """List of collections available for evaluation.
117
+
118
+ NOTE: This is currently supported only for H2OGPTe and Amazon Bedrock RAG
119
+ model hosts.
120
+ """
121
+ res = self._model_api.model_service_list_model_collections(self.key)
122
+ if res and res.collections:
123
+ return list(res.collections)
124
+
125
+ raise RuntimeError("Failed to list model host collections")
126
+
83
127
  def create_leaderboard(
84
128
  self,
85
129
  name: str,
@@ -88,20 +132,27 @@ class Model:
88
132
  description: Optional[str] = None,
89
133
  base_models: Optional[List[str]] = None,
90
134
  use_cache: bool = True,
135
+ existing_collection: Optional[str] = None,
91
136
  ) -> Optional[l10s.Leaderboard]:
92
137
  """Runs a new evaluation for the model and creates a new leaderboard.
93
138
 
94
139
  Args:
140
+ name: The name of the leaderboard.
95
141
  evaluator: The evaluator to use for the evaluation.
96
142
  test_suite: The list of tests used to evaluate the model.
143
+ description (optional): The description of the leaderboard.
97
144
  base_models (optional): The base LLM models to use for the evaluation.
98
145
  use_cache (optional): Whether to use the cached answers if available.
146
+ existing_collection (str): ID or the resource name of the existing
147
+ collection, which will be used as a corpus for evaluation.
148
+ NOTE: This option works only for the H2OGPTe and Amazon Bedrock model hosts ATM.
99
149
  """
100
150
  lb = l10s.Leaderboard(
101
151
  key="",
102
152
  name=name,
103
153
  description=description or "",
104
154
  base_models=base_models or [],
155
+ existing_collection=existing_collection,
105
156
  _model_name=self.key,
106
157
  _evaluator_name=evaluator.key,
107
158
  _test_names=[t.key for t in test_suite],
@@ -128,6 +179,7 @@ class Model:
128
179
  test_suites: Union[tests.Test, List[tests.Test]],
129
180
  description: Optional[str] = None,
130
181
  base_models: Optional[List[str]] = None,
182
+ existing_collection: Optional[str] = None,
131
183
  ) -> Optional[d8s.Dashboard]:
132
184
  """Runs a new evaluation for the model and creates a new dashboard.
133
185
 
@@ -136,6 +188,9 @@ class Model:
136
188
  test_suites: The test(s) used to evaluate the model.
137
189
  description (optional): The description of the dashboard.
138
190
  base_models (optional): The base LLM models to use for the evaluation.
191
+ existing_collection (str): ID or the resource name of the existing
192
+ collection, which will be used as a corpus for evaluation.
193
+ NOTE: This option works only for the H2OGPTe and Amazon Bedrock model hosts ATM.
139
194
  """
140
195
  _evaluators = (
141
196
  [evaluators] if isinstance(evaluators, e8s.Evaluator) else evaluators
@@ -151,6 +206,7 @@ class Model:
151
206
  name=f"{name} - {evaluator.name}",
152
207
  description=description or "",
153
208
  base_models=base_models or [],
209
+ existing_collection=existing_collection,
154
210
  _model_name=self.key,
155
211
  _evaluator_name=evaluator.key,
156
212
  _test_names=[t.key for t in _test_suites],
@@ -271,6 +271,7 @@ class Test:
271
271
  model: Optional[str] = None,
272
272
  base_llm_model: Optional[str] = None,
273
273
  generators: Optional[List[TestCaseGenerator]] = None,
274
+ existing_collection: Optional[str] = None,
274
275
  ) -> "TestCaseGenerationHandle":
275
276
  """Generates test cases based on the documents of the Test.
276
277
 
@@ -280,6 +281,9 @@ class Test:
280
281
  model (str): Model to use for generating the prompts.
281
282
  base_llm_model (str): Base LLM model to use for generating the prompts.
282
283
  generators (List[TestCaseGenerator]): Methods to use for generation.
284
+ existing_collection (str): ID or the resource name of the existing
285
+ collection, from which prompts will be generated.
286
+ NOTE: This option works only for the H2OGPTe model host ATM.
283
287
  """
284
288
 
285
289
  req = models.TestServiceGenerateTestCasesRequest(
@@ -287,6 +291,7 @@ class Test:
287
291
  model=model or None,
288
292
  base_llm_model=base_llm_model or None,
289
293
  generators=[g.to_api_proto() for g in generators] if generators else None,
294
+ h2ogpte_collection_id=existing_collection or None,
290
295
  )
291
296
 
292
297
  res = self._test_api.test_service_generate_test_cases(self.key, req)
@@ -317,23 +322,32 @@ class Test:
317
322
  # exponential backoff
318
323
  wait_time = 1.0
319
324
  wait_coef = 1.6
320
- wait_max = 20.0
325
+ wait_max = 8.0
321
326
  wait_total = 0.0
322
327
  timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
328
+ # progress
329
+ p_max = 1.0
330
+ p_msg = ""
323
331
  while wait_total < timeout:
324
332
  handle = TestCaseGenerationHandle._from_operation(
325
333
  self._operation_api.operation_service_get_operation(handle.name)
326
334
  )
327
335
 
328
336
  if verbose:
337
+ print(" " * len(p_msg), end="\r")
329
338
  if handle.progress or handle.progress_message:
330
- progress = (
331
- int(handle.progress * 100.0) if handle.progress else 0
332
- )
333
- msg = f"{progress:>2}% - '{handle.progress_message}'"
339
+ try:
340
+ h_progress = float(str(handle.progress))
341
+ except ValueError:
342
+ h_progress = 0.0
343
+ h_msg = handle.progress_message or "Processing"
334
344
  else:
335
- msg = " 0% - 'Initializing'"
336
- print(f" {msg}")
345
+ h_progress = 0.0
346
+ h_msg = "Initializing"
347
+ p_progress = int(h_progress / p_max * 100)
348
+ p_hashes = p_progress // 5
349
+ p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
350
+ print(p_msg, end="\r")
337
351
 
338
352
  if handle.done:
339
353
  return handle
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: eval-studio-client
3
- Version: 0.8.2
3
+ Version: 1.0.0a1
4
4
  Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
5
5
  Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
6
6
  Author-email: "H2O.ai" <support@h2o.ai>
@@ -5,12 +5,12 @@ eval_studio_client/dashboards.py,sha256=S35kude0FSn-v0t-H1N6aHhsNhlmIgF3duKR8TUf
5
5
  eval_studio_client/documents.py,sha256=fjsbHnqZnouu0stCf_p15RgoszkY4_gIsbX1hiw7Xv8,3076
6
6
  eval_studio_client/evaluators.py,sha256=blJlWMswIGr1u6TQDiiO-fInYVnkBT0Y02J57o8Z094,2100
7
7
  eval_studio_client/insights.py,sha256=bhe6XBVJ61-2bcDdNe6HiZsu0sly8LeoYAKo1GkgK08,1199
8
- eval_studio_client/leaderboards.py,sha256=UZItYErAGRXDsae61iMnHXXjoAUFSPL-HTQ_eQnkIJI,7746
9
- eval_studio_client/models.py,sha256=4OFASuJF1OvIdVODqUk4Uv70cojIJ9CFz3U1nmPFJwI,19137
8
+ eval_studio_client/leaderboards.py,sha256=5S4cJVS8bX_KoRcT_75eXxrDY-xdfkQdehwGgIgIBfU,7933
9
+ eval_studio_client/models.py,sha256=nW1Wk6L89iWSjhMVk_sKmxSomKX3b6ANALbwWvbJ7Uk,21346
10
10
  eval_studio_client/perturbators.py,sha256=CtcWqEgPGpOcDHvYAQBlNDKnS-ZDBkL7Y_Ygsgpvikw,3133
11
11
  eval_studio_client/problems.py,sha256=rdGIfo7AqyxGhWMpbIDX1WXFoQvzKktKAWDKRde5VbY,1515
12
12
  eval_studio_client/test_labs.py,sha256=IEY98Ocu7WQcxZN_jy5YthVBoHAgHjgA2T93U7q0eYE,11260
13
- eval_studio_client/tests.py,sha256=n14-zM2J9oUKgKZQm2xjtg7f8MWxnL2Ov00jQqMP8fw,22512
13
+ eval_studio_client/tests.py,sha256=xMKI3OC-dRHlss484gkuLWcF-XFuLZxx7-XMIuNmAxU,23236
14
14
  eval_studio_client/api/__init__.py,sha256=Ef5qooH4SLfYUqVBJl79oRKWYnXryDPZV4IXGfvG1Wc,15269
15
15
  eval_studio_client/api/api_client.py,sha256=yFQKmCsVhswcTbdGY4lf-61mf8FVm3Kfon8Qhe1sPKw,26431
16
16
  eval_studio_client/api/api_response.py,sha256=eMxw1mpmJcoGZ3gs9z6jM4oYoZ10Gjk333s9sKxGv7s,652
@@ -480,6 +480,6 @@ eval_studio_client/api/test/test_v1_update_test_response.py,sha256=pqTwL9SgoOM9k
480
480
  eval_studio_client/api/test/test_v1_who_am_i_response.py,sha256=bNbjL5-b-4asyziW6znJhuU2yrzd9RgJa2ZiNw3e6YA,1523
481
481
  eval_studio_client/api/test/test_who_am_i_service_api.py,sha256=gYWKFamJMyVne2QaOSPz6WEkxExRuAphMGKf1nFayLU,898
482
482
  eval_studio_client/gen/openapiv2/eval_studio.swagger.json,sha256=2jOBBxQ2H2mS9C_nlqoTrTiYMmCLaUFQym6su3fXJ8I,210976
483
- eval_studio_client-0.8.2.dist-info/METADATA,sha256=hza1__A-Rky7RO8E8KyQmkb-KXvODW1wNLZPAWCJWBk,707
484
- eval_studio_client-0.8.2.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
485
- eval_studio_client-0.8.2.dist-info/RECORD,,
483
+ eval_studio_client-1.0.0a1.dist-info/METADATA,sha256=rX1UrncVa_ayrO30V9oeNhTjqV1EWNyBFOvL2q8YJ9c,709
484
+ eval_studio_client-1.0.0a1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
485
+ eval_studio_client-1.0.0a1.dist-info/RECORD,,