eval-studio-client 1.0.1__py3-none-any.whl → 1.0.3a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. eval_studio_client/api/__init__.py +0 -43
  2. eval_studio_client/api/api/__init__.py +0 -5
  3. eval_studio_client/api/api/perturbator_service_api.py +1 -268
  4. eval_studio_client/api/api/test_service_api.py +0 -568
  5. eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +0 -1
  6. eval_studio_client/api/docs/PerturbatorServiceApi.md +3 -33
  7. eval_studio_client/api/docs/PromptGenerationServiceAutoGeneratePromptsRequest.md +1 -2
  8. eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +0 -1
  9. eval_studio_client/api/docs/TestServiceApi.md +0 -140
  10. eval_studio_client/api/docs/TestServiceGenerateTestCasesRequest.md +0 -1
  11. eval_studio_client/api/docs/TestServicePerturbTestRequest.md +0 -1
  12. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +0 -1
  13. eval_studio_client/api/docs/V1ImportEvaluationRequest.md +0 -1
  14. eval_studio_client/api/docs/V1TestCase.md +0 -1
  15. eval_studio_client/api/models/__init__.py +0 -38
  16. eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +2 -8
  17. eval_studio_client/api/models/prompt_generation_service_auto_generate_prompts_request.py +3 -5
  18. eval_studio_client/api/models/required_the_test_case_to_update.py +2 -6
  19. eval_studio_client/api/models/test_service_generate_test_cases_request.py +2 -4
  20. eval_studio_client/api/models/test_service_perturb_test_request.py +2 -4
  21. eval_studio_client/api/models/v1_create_evaluation_request.py +2 -7
  22. eval_studio_client/api/models/v1_import_evaluation_request.py +2 -7
  23. eval_studio_client/api/models/v1_test_case.py +2 -6
  24. eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +2 -20
  25. eval_studio_client/api/test/test_prompt_generation_service_auto_generate_prompts_request.py +1 -4
  26. eval_studio_client/api/test/test_required_the_test_case_to_update.py +1 -4
  27. eval_studio_client/api/test/test_test_service_api.py +0 -12
  28. eval_studio_client/api/test/test_test_service_generate_test_cases_request.py +1 -4
  29. eval_studio_client/api/test/test_test_service_perturb_test_request.py +1 -4
  30. eval_studio_client/api/test/test_v1_batch_delete_test_cases_response.py +1 -4
  31. eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -20
  32. eval_studio_client/api/test/test_v1_create_test_case_response.py +1 -4
  33. eval_studio_client/api/test/test_v1_delete_test_case_response.py +1 -4
  34. eval_studio_client/api/test/test_v1_evaluation_test.py +1 -4
  35. eval_studio_client/api/test/test_v1_find_all_test_cases_by_id_response.py +1 -4
  36. eval_studio_client/api/test/test_v1_get_test_case_response.py +1 -4
  37. eval_studio_client/api/test/test_v1_import_evaluation_request.py +1 -16
  38. eval_studio_client/api/test/test_v1_list_test_cases_response.py +1 -4
  39. eval_studio_client/api/test/test_v1_test_case.py +1 -4
  40. eval_studio_client/api/test/test_v1_update_test_case_response.py +1 -4
  41. eval_studio_client/client.py +11 -9
  42. eval_studio_client/dashboards.py +0 -29
  43. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +32 -1903
  44. eval_studio_client/leaderboards.py +0 -123
  45. eval_studio_client/models.py +42 -3
  46. eval_studio_client/test_labs.py +21 -49
  47. eval_studio_client/tests.py +1 -188
  48. {eval_studio_client-1.0.1.dist-info → eval_studio_client-1.0.3a1.dist-info}/METADATA +3 -2
  49. {eval_studio_client-1.0.1.dist-info → eval_studio_client-1.0.3a1.dist-info}/RECORD +50 -179
  50. {eval_studio_client-1.0.1.dist-info → eval_studio_client-1.0.3a1.dist-info}/WHEEL +1 -1
  51. eval_studio_client/api/api/human_calibration_service_api.py +0 -304
  52. eval_studio_client/api/api/prompt_library_service_api.py +0 -669
  53. eval_studio_client/api/api/workflow_edge_service_api.py +0 -296
  54. eval_studio_client/api/api/workflow_node_service_api.py +0 -1634
  55. eval_studio_client/api/api/workflow_service_api.py +0 -1609
  56. eval_studio_client/api/docs/HumanCalibrationServiceApi.md +0 -77
  57. eval_studio_client/api/docs/PromptLibraryServiceApi.md +0 -155
  58. eval_studio_client/api/docs/ProtobufNullValue.md +0 -12
  59. eval_studio_client/api/docs/RequiredTheUpdatedWorkflow.md +0 -44
  60. eval_studio_client/api/docs/RequiredTheUpdatedWorkflowNode.md +0 -44
  61. eval_studio_client/api/docs/TestServiceImportTestCasesFromLibraryRequest.md +0 -32
  62. eval_studio_client/api/docs/TestServiceListTestCaseLibraryItemsRequest.md +0 -35
  63. eval_studio_client/api/docs/V1BatchDeleteWorkflowsRequest.md +0 -29
  64. eval_studio_client/api/docs/V1BatchDeleteWorkflowsResponse.md +0 -29
  65. eval_studio_client/api/docs/V1BatchGetWorkflowEdgesResponse.md +0 -29
  66. eval_studio_client/api/docs/V1BatchGetWorkflowNodesResponse.md +0 -29
  67. eval_studio_client/api/docs/V1CreateWorkflowResponse.md +0 -29
  68. eval_studio_client/api/docs/V1DeleteWorkflowNodeResponse.md +0 -29
  69. eval_studio_client/api/docs/V1DeleteWorkflowResponse.md +0 -29
  70. eval_studio_client/api/docs/V1EstimateThresholdRequest.md +0 -33
  71. eval_studio_client/api/docs/V1GetWorkflowNodePrerequisitesResponse.md +0 -30
  72. eval_studio_client/api/docs/V1GetWorkflowNodeResponse.md +0 -29
  73. eval_studio_client/api/docs/V1GetWorkflowResponse.md +0 -29
  74. eval_studio_client/api/docs/V1ImportTestCasesFromLibraryResponse.md +0 -29
  75. eval_studio_client/api/docs/V1ImportTestCasesRequest.md +0 -33
  76. eval_studio_client/api/docs/V1LabeledTestCase.md +0 -31
  77. eval_studio_client/api/docs/V1ListPromptLibraryItemsResponse.md +0 -29
  78. eval_studio_client/api/docs/V1ListTestCaseLibraryItemsResponse.md +0 -29
  79. eval_studio_client/api/docs/V1ListWorkflowsResponse.md +0 -29
  80. eval_studio_client/api/docs/V1ProcessWorkflowNodeResponse.md +0 -29
  81. eval_studio_client/api/docs/V1PromptLibraryItem.md +0 -42
  82. eval_studio_client/api/docs/V1TestSuiteEvaluates.md +0 -11
  83. eval_studio_client/api/docs/V1UpdateWorkflowNodeResponse.md +0 -29
  84. eval_studio_client/api/docs/V1UpdateWorkflowResponse.md +0 -29
  85. eval_studio_client/api/docs/V1Workflow.md +0 -46
  86. eval_studio_client/api/docs/V1WorkflowEdge.md +0 -40
  87. eval_studio_client/api/docs/V1WorkflowEdgeType.md +0 -12
  88. eval_studio_client/api/docs/V1WorkflowNode.md +0 -46
  89. eval_studio_client/api/docs/V1WorkflowNodeArtifact.md +0 -40
  90. eval_studio_client/api/docs/V1WorkflowNodeArtifacts.md +0 -29
  91. eval_studio_client/api/docs/V1WorkflowNodeAttributes.md +0 -30
  92. eval_studio_client/api/docs/V1WorkflowNodeStatus.md +0 -12
  93. eval_studio_client/api/docs/V1WorkflowNodeType.md +0 -12
  94. eval_studio_client/api/docs/V1WorkflowNodeView.md +0 -12
  95. eval_studio_client/api/docs/V1WorkflowType.md +0 -12
  96. eval_studio_client/api/docs/WorkflowEdgeServiceApi.md +0 -76
  97. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +0 -423
  98. eval_studio_client/api/docs/WorkflowServiceApi.md +0 -417
  99. eval_studio_client/api/models/protobuf_null_value.py +0 -36
  100. eval_studio_client/api/models/required_the_updated_workflow.py +0 -152
  101. eval_studio_client/api/models/required_the_updated_workflow_node.py +0 -152
  102. eval_studio_client/api/models/test_service_import_test_cases_from_library_request.py +0 -93
  103. eval_studio_client/api/models/test_service_list_test_case_library_items_request.py +0 -99
  104. eval_studio_client/api/models/v1_batch_delete_workflows_request.py +0 -87
  105. eval_studio_client/api/models/v1_batch_delete_workflows_response.py +0 -95
  106. eval_studio_client/api/models/v1_batch_get_workflow_edges_response.py +0 -95
  107. eval_studio_client/api/models/v1_batch_get_workflow_nodes_response.py +0 -95
  108. eval_studio_client/api/models/v1_create_workflow_response.py +0 -91
  109. eval_studio_client/api/models/v1_delete_workflow_node_response.py +0 -91
  110. eval_studio_client/api/models/v1_delete_workflow_response.py +0 -91
  111. eval_studio_client/api/models/v1_estimate_threshold_request.py +0 -103
  112. eval_studio_client/api/models/v1_get_workflow_node_prerequisites_response.py +0 -89
  113. eval_studio_client/api/models/v1_get_workflow_node_response.py +0 -91
  114. eval_studio_client/api/models/v1_get_workflow_response.py +0 -91
  115. eval_studio_client/api/models/v1_import_test_cases_from_library_response.py +0 -91
  116. eval_studio_client/api/models/v1_import_test_cases_request.py +0 -95
  117. eval_studio_client/api/models/v1_labeled_test_case.py +0 -91
  118. eval_studio_client/api/models/v1_list_prompt_library_items_response.py +0 -95
  119. eval_studio_client/api/models/v1_list_test_case_library_items_response.py +0 -95
  120. eval_studio_client/api/models/v1_list_workflows_response.py +0 -95
  121. eval_studio_client/api/models/v1_process_workflow_node_response.py +0 -91
  122. eval_studio_client/api/models/v1_prompt_library_item.py +0 -129
  123. eval_studio_client/api/models/v1_test_suite_evaluates.py +0 -39
  124. eval_studio_client/api/models/v1_update_workflow_node_response.py +0 -91
  125. eval_studio_client/api/models/v1_update_workflow_response.py +0 -91
  126. eval_studio_client/api/models/v1_workflow.py +0 -156
  127. eval_studio_client/api/models/v1_workflow_edge.py +0 -123
  128. eval_studio_client/api/models/v1_workflow_edge_type.py +0 -37
  129. eval_studio_client/api/models/v1_workflow_node.py +0 -156
  130. eval_studio_client/api/models/v1_workflow_node_artifact.py +0 -122
  131. eval_studio_client/api/models/v1_workflow_node_artifacts.py +0 -97
  132. eval_studio_client/api/models/v1_workflow_node_attributes.py +0 -87
  133. eval_studio_client/api/models/v1_workflow_node_status.py +0 -40
  134. eval_studio_client/api/models/v1_workflow_node_type.py +0 -41
  135. eval_studio_client/api/models/v1_workflow_node_view.py +0 -38
  136. eval_studio_client/api/models/v1_workflow_type.py +0 -37
  137. eval_studio_client/api/test/test_human_calibration_service_api.py +0 -38
  138. eval_studio_client/api/test/test_prompt_library_service_api.py +0 -43
  139. eval_studio_client/api/test/test_protobuf_null_value.py +0 -33
  140. eval_studio_client/api/test/test_required_the_updated_workflow.py +0 -88
  141. eval_studio_client/api/test/test_required_the_updated_workflow_node.py +0 -80
  142. eval_studio_client/api/test/test_test_service_import_test_cases_from_library_request.py +0 -56
  143. eval_studio_client/api/test/test_test_service_list_test_case_library_items_request.py +0 -63
  144. eval_studio_client/api/test/test_v1_batch_delete_workflows_request.py +0 -53
  145. eval_studio_client/api/test/test_v1_batch_delete_workflows_response.py +0 -92
  146. eval_studio_client/api/test/test_v1_batch_get_workflow_edges_response.py +0 -64
  147. eval_studio_client/api/test/test_v1_batch_get_workflow_nodes_response.py +0 -84
  148. eval_studio_client/api/test/test_v1_create_workflow_response.py +0 -90
  149. eval_studio_client/api/test/test_v1_delete_workflow_node_response.py +0 -82
  150. eval_studio_client/api/test/test_v1_delete_workflow_response.py +0 -90
  151. eval_studio_client/api/test/test_v1_estimate_threshold_request.py +0 -60
  152. eval_studio_client/api/test/test_v1_get_workflow_node_prerequisites_response.py +0 -56
  153. eval_studio_client/api/test/test_v1_get_workflow_node_response.py +0 -82
  154. eval_studio_client/api/test/test_v1_get_workflow_response.py +0 -90
  155. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +0 -71
  156. eval_studio_client/api/test/test_v1_import_test_cases_request.py +0 -57
  157. eval_studio_client/api/test/test_v1_labeled_test_case.py +0 -53
  158. eval_studio_client/api/test/test_v1_list_prompt_library_items_response.py +0 -71
  159. eval_studio_client/api/test/test_v1_list_test_case_library_items_response.py +0 -71
  160. eval_studio_client/api/test/test_v1_list_workflows_response.py +0 -92
  161. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +0 -71
  162. eval_studio_client/api/test/test_v1_prompt_library_item.py +0 -68
  163. eval_studio_client/api/test/test_v1_test_suite_evaluates.py +0 -33
  164. eval_studio_client/api/test/test_v1_update_workflow_node_response.py +0 -82
  165. eval_studio_client/api/test/test_v1_update_workflow_response.py +0 -90
  166. eval_studio_client/api/test/test_v1_workflow.py +0 -89
  167. eval_studio_client/api/test/test_v1_workflow_edge.py +0 -61
  168. eval_studio_client/api/test/test_v1_workflow_edge_type.py +0 -33
  169. eval_studio_client/api/test/test_v1_workflow_node.py +0 -81
  170. eval_studio_client/api/test/test_v1_workflow_node_artifact.py +0 -61
  171. eval_studio_client/api/test/test_v1_workflow_node_artifacts.py +0 -64
  172. eval_studio_client/api/test/test_v1_workflow_node_attributes.py +0 -51
  173. eval_studio_client/api/test/test_v1_workflow_node_status.py +0 -33
  174. eval_studio_client/api/test/test_v1_workflow_node_type.py +0 -33
  175. eval_studio_client/api/test/test_v1_workflow_node_view.py +0 -33
  176. eval_studio_client/api/test/test_v1_workflow_type.py +0 -33
  177. eval_studio_client/api/test/test_workflow_edge_service_api.py +0 -38
  178. eval_studio_client/api/test/test_workflow_node_service_api.py +0 -73
  179. eval_studio_client/api/test/test_workflow_service_api.py +0 -73
@@ -1,7 +1,6 @@
1
1
  import dataclasses
2
2
  import datetime
3
3
  import json
4
- import os
5
4
  import time
6
5
  from typing import Dict
7
6
  from typing import List
@@ -34,7 +33,6 @@ class Leaderboard:
34
33
  update_time: Optional[datetime.datetime] = None
35
34
  problems: List[p6s.Problem] = dataclasses.field(default_factory=list)
36
35
  insights: List[i6s.Insight] = dataclasses.field(default_factory=list)
37
- summary: Optional[str] = None
38
36
  existing_collection: Optional[str] = None
39
37
  _report: Optional[str] = None
40
38
  _leaderboard: Optional[str] = None
@@ -88,42 +86,6 @@ class Leaderboard:
88
86
  if self._client:
89
87
  self._leaderboard_api.leaderboard_service_delete_leaderboard(self.key)
90
88
 
91
- def download_result(self, dest: str):
92
- """Downloads the leaderboard result to a JSON file.
93
-
94
- Args:
95
- dest (str): The destination path for the report.
96
- """
97
- if not os.path.exists(dest):
98
- raise ValueError("Destination path does not exist.")
99
-
100
- if os.path.isdir(dest):
101
- dest = os.path.join(dest, "results.json")
102
-
103
- if self._client and self.finished:
104
- headers: Dict[str, str] = {}
105
- url = urljoin(
106
- self._client.configuration.host, f"/content/{self.key}/results"
107
- )
108
- self._client.update_params_for_auth(
109
- headers=headers,
110
- queries=[],
111
- auth_settings=[],
112
- resource_path=url,
113
- method="GET",
114
- body=None,
115
- )
116
- response = urllib3.request("GET", url, headers=headers)
117
-
118
- if response.status == 200:
119
- with open(dest, "wb") as f:
120
- f.write(response.data)
121
- return
122
- else:
123
- raise RuntimeError("Failed to retrieve leaderboard result.")
124
-
125
- raise ValueError("Cannot download result for unfinished leaderboard.")
126
-
127
89
  def download_report(self, dest: str):
128
90
  """Downloads the leaderboard report to a zip file.
129
91
 
@@ -152,30 +114,6 @@ class Leaderboard:
152
114
 
153
115
  raise ValueError("Cannot download report for unfinished leaderboard.")
154
116
 
155
- def get_result_json(self) -> str:
156
- """Retrieves the leaderboard result as a JSON string."""
157
- if self._client and self.finished:
158
- headers: Dict[str, str] = {}
159
- url = urljoin(
160
- self._client.configuration.host, f"/content/{self.key}/results"
161
- )
162
- self._client.update_params_for_auth(
163
- headers=headers,
164
- queries=[],
165
- auth_settings=[],
166
- resource_path=url,
167
- method="GET",
168
- body=None,
169
- )
170
- response = urllib3.request("GET", url, headers=headers)
171
-
172
- if response.status == 200:
173
- return str(response.data)
174
- else:
175
- raise RuntimeError("Failed to retrieve leaderboard result.")
176
-
177
- raise ValueError("Cannot download result for unfinished leaderboard.")
178
-
179
117
  def get_table(self) -> LeaderboardTable:
180
118
  """Retrieves the leaderboard table."""
181
119
  if self._client and self.finished:
@@ -232,7 +170,6 @@ class Leaderboard:
232
170
  """Refresh the leaderboard with the latest API data."""
233
171
  self.key = api_leaderboard.name or ""
234
172
  self.update_time = api_leaderboard.update_time
235
- self.summary = api_leaderboard.leaderboard_summary
236
173
  self._leaderboard = api_leaderboard.leaderboard_table
237
174
  self._report = api_leaderboard.leaderboard_report or ""
238
175
  self._status = api_leaderboard.status
@@ -255,7 +192,6 @@ class Leaderboard:
255
192
  update_time=api_leaderboard.update_time,
256
193
  problems=problems,
257
194
  insights=insights,
258
- summary=api_leaderboard.leaderboard_summary,
259
195
  existing_collection=api_leaderboard.h2ogpte_collection or None,
260
196
  _evaluator_name=api_leaderboard.evaluator or "",
261
197
  _test_names=api_leaderboard.tests or [],
@@ -272,62 +208,3 @@ class Leaderboard:
272
208
  models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
273
209
  models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
274
210
  ]
275
-
276
- @staticmethod
277
- def from_operation(
278
- operation: models.V1Operation, client: Optional[api.ApiClient]
279
- ) -> Optional["Leaderboard"]:
280
- """Retrieves the leaderboard from the operation, which created it.
281
-
282
- Args:
283
- operation: The operation that created the dashboard.
284
- client: The API client to use for the leaderboard retrieval.
285
-
286
- Returns:
287
- Leaderboard: The leaderboard instance created by the operation.
288
- """
289
- if not client:
290
- raise RuntimeError("API Client is not provided")
291
-
292
- if not operation.metadata:
293
- raise RuntimeError(
294
- "Operation metadata missing, it's not possible to retrieve leaderboard from operation"
295
- )
296
-
297
- leaderboard_api = api.LeaderboardServiceApi(client)
298
- leadeboard_id = operation.metadata.to_dict().get("leaderboard", "")
299
- res = leaderboard_api.leaderboard_service_get_leaderboard(str(leadeboard_id))
300
- if res and res.leaderboard:
301
- return Leaderboard._from_api_leaderboard(res.leaderboard, client)
302
-
303
- return None
304
-
305
-
306
- class _Leaderboards:
307
- def __init__(self, client: api.ApiClient):
308
- self._client = client
309
- self._api = api.LeaderboardServiceApi(client)
310
-
311
- def get(self, key: str) -> Leaderboard:
312
- """Gets an individual leaderboard with a given key from Eval Studio.
313
-
314
- Args:
315
- key: The leaderboard resource name to retrieve.
316
- """
317
- res = self._api.leaderboard_service_get_leaderboard(key)
318
- if res and res.leaderboard:
319
- return Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
320
-
321
- raise KeyError("Leaderboard not found.")
322
-
323
- def list(self) -> List[Leaderboard]:
324
- """Lists all user leaderboards in Eval Studio."""
325
- res = self._api.leaderboard_service_list_leaderboards()
326
- if res:
327
- res_leaderboards = res.leaderboards or []
328
- return [
329
- Leaderboard._from_api_leaderboard(lb, self._client)
330
- for lb in res_leaderboards
331
- ]
332
-
333
- return []
@@ -168,7 +168,7 @@ class Model:
168
168
  )
169
169
 
170
170
  if res and res.operation:
171
- return l10s.Leaderboard.from_operation(res.operation, self._client)
171
+ return self._get_leaderboard_from_operation(res.operation)
172
172
 
173
173
  return None
174
174
 
@@ -226,7 +226,7 @@ class Model:
226
226
  )
227
227
 
228
228
  if res and res.operation:
229
- return d8s.Dashboard.from_operation(res.operation, self._client)
229
+ return self._get_dashboard_from_operation(res.operation)
230
230
 
231
231
  return None
232
232
 
@@ -257,7 +257,7 @@ class Model:
257
257
  )
258
258
  res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
259
259
  if res and res.operation:
260
- return l10s.Leaderboard.from_operation(res.operation, self._client)
260
+ return self._get_leaderboard_from_operation(res.operation)
261
261
 
262
262
  return None
263
263
 
@@ -273,6 +273,45 @@ class Model:
273
273
 
274
274
  raise RuntimeError("Failed to list base models")
275
275
 
276
+ def _get_leaderboard_from_operation(
277
+ self, operation: models.V1Operation
278
+ ) -> Optional[l10s.Leaderboard]:
279
+ """Retrieves the leaderboard from the operation, which created it.
280
+
281
+ Args:
282
+ operation: The operation that created the leaderboard.
283
+ """
284
+ if not operation.metadata:
285
+ raise RuntimeError("Not possible to retrieve leaderboard from operation")
286
+
287
+ leadeboard_id = operation.metadata.to_dict().get("leaderboard")
288
+ res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
289
+ if res and res.leaderboard:
290
+ return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
291
+
292
+ return None
293
+
294
+ def _get_dashboard_from_operation(
295
+ self, operation: models.V1Operation
296
+ ) -> Optional[d8s.Dashboard]:
297
+ """Retrieves the dashboard from the operation, which created it.
298
+
299
+ Args:
300
+ operation: The operation that created the dashboard.
301
+ """
302
+ if not self._client:
303
+ raise RuntimeError("Client is not set.")
304
+
305
+ if not operation.metadata:
306
+ raise RuntimeError("Not possible to retrieve dashboard from operation")
307
+
308
+ dashboard_id = operation.metadata.to_dict().get("dashboard")
309
+ res = self._dashboard_api.dashboard_service_get_dashboard(dashboard_id)
310
+ if res and res.dashboard:
311
+ return d8s.Dashboard._from_api_dashboard(res.dashboard, self._client)
312
+
313
+ return None
314
+
276
315
  @staticmethod
277
316
  def _from_api_model(api_model: models.V1Model, client: api.ApiClient) -> "Model":
278
317
  """Converts the API model to the client model."""
@@ -7,8 +7,7 @@ from typing import Union
7
7
  import uuid
8
8
 
9
9
  from eval_studio_client import api
10
- from eval_studio_client import dashboards
11
- from eval_studio_client import evaluators as e8s
10
+ from eval_studio_client import evaluators
12
11
  from eval_studio_client import leaderboards as l10s
13
12
  from eval_studio_client.api import models as apiModels
14
13
 
@@ -93,56 +92,11 @@ class TestLab:
93
92
  self._models.append(_m)
94
93
  return _m
95
94
 
96
- def evaluate(
97
- self,
98
- evaluators: Union[e8s.Evaluator, List[e8s.Evaluator]],
99
- name: Optional[str] = None,
100
- description: Optional[str] = None,
101
- ) -> Optional[dashboards.Dashboard]:
95
+ def evaluate(self, evaluator: evaluators.Evaluator) -> Optional[l10s.Leaderboard]:
102
96
  """Runs an evaluation for the test lab.
103
97
 
104
- Args:
105
- evaluators (Union[e8s.Evaluator, List[e8s.Evaluator]]): One or many evaluators
106
- used to evaluate the test lab.
107
- name (str, optional): Optional name for the evaluation.
108
- description (str, optional): Optional description for the evaluation.
109
-
110
- Returns:
111
- Dashboard: Evaluation dashboard instance. In case launching of evaluation
112
- fails, `None` is returned.
113
- """
114
- _evaluators = (
115
- [evaluators] if isinstance(evaluators, e8s.Evaluator) else evaluators
116
- )
117
- name = name or self.name or "Imported Dashboard"
118
- description = description or self.description or ""
119
- req = apiModels.V1BatchImportLeaderboardRequest(
120
- testLabJson=self.json(),
121
- evaluators=[e.key for e in _evaluators],
122
- model=None,
123
- dashboardDisplayName=name,
124
- dashboardDescription=description,
125
- testDisplayName=f"{name} - Test",
126
- testDescription=f"Test suite for {description}",
127
- )
128
- res = self._leaderboard_api.leaderboard_service_batch_import_leaderboard(req)
129
-
130
- if res and res.operation:
131
- return dashboards.Dashboard.from_operation(res.operation, self._client)
132
-
133
- return None
134
-
135
- def create_leaderboard(
136
- self, evaluator: e8s.Evaluator
137
- ) -> Optional[l10s.Leaderboard]:
138
- """Creates a single leaderboard for the test lab.
139
-
140
98
  Args:
141
99
  evaluator: The evaluator to use for the evaluation.
142
-
143
- Returns:
144
- Leaderboard: Single evaluation leaderboard instance.
145
- In case launching of evaluation fails, `None` is returned.
146
100
  """
147
101
  req = apiModels.V1ImportLeaderboardRequest(
148
102
  testLabJson=self.json(),
@@ -155,7 +109,7 @@ class TestLab:
155
109
  )
156
110
  res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
157
111
  if res and res.operation:
158
- return l10s.Leaderboard.from_operation(res.operation, self._client)
112
+ return self._get_leaderboard_from_operation(res.operation)
159
113
 
160
114
  return None
161
115
 
@@ -177,6 +131,24 @@ class TestLab:
177
131
 
178
132
  return json.dumps(lab, indent=4, sort_keys=True)
179
133
 
134
+ def _get_leaderboard_from_operation(
135
+ self, operation: apiModels.V1Operation
136
+ ) -> Optional[l10s.Leaderboard]:
137
+ """Retrieves the leaderboard from the operation, which created it.
138
+
139
+ Args:
140
+ operation: The operation that created the leaderboard.
141
+ """
142
+ if not operation.metadata:
143
+ raise RuntimeError("Not possible to retrieve leaderboard from operation")
144
+
145
+ leadeboard_id = operation.metadata.to_dict().get("leaderboard")
146
+ res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
147
+ if res and res.leaderboard:
148
+ return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
149
+
150
+ return None
151
+
180
152
  def _llm_model_names(self) -> List[str]:
181
153
  return [m.llm_model_name for m in self.models]
182
154
 
@@ -118,71 +118,6 @@ class _TestCaseGenerationHandle:
118
118
  )
119
119
 
120
120
 
121
- @dataclasses.dataclass
122
- class _TestCaseLibraryGetHandle(_TestCaseGenerationHandle):
123
-
124
- @staticmethod
125
- def _from_operation(
126
- res: (
127
- models.V1ImportTestCasesFromLibraryResponse | models.V1GetOperationResponse
128
- ),
129
- ) -> "_TestCaseLibraryGetHandle":
130
- """Converts an API operation to prompt library handle."""
131
- op: models.V1Operation | None = res.operation
132
- if not op:
133
- return _TestCaseLibraryGetHandle(name=None)
134
-
135
- # progress
136
- if hasattr(op, "metadata") and op.metadata:
137
- meta_dict = op.metadata.to_dict() or {}
138
- else:
139
- meta_dict = {}
140
-
141
- return _TestCaseLibraryGetHandle(
142
- name=op.name,
143
- progress=meta_dict.get("progress"),
144
- progress_message=meta_dict.get("progressMessage"),
145
- error=op.error,
146
- done=op.done,
147
- )
148
-
149
-
150
- @dataclasses.dataclass
151
- class TestCaseLibraryItem:
152
- """Represents a single test case library item - test suite."""
153
-
154
- key: str
155
- name: str
156
- description: str
157
- test_suite_url: str
158
- test_count: int
159
- test_case_count: int
160
- evaluates: List[str]
161
- categories: List[str]
162
-
163
- @staticmethod
164
- def _from_api_items(
165
- api_items: List[models.V1PromptLibraryItem],
166
- ) -> List["TestCaseLibraryItem"]:
167
- return (
168
- [
169
- TestCaseLibraryItem(
170
- key=api_item.name or "",
171
- name=api_item.display_name or "",
172
- description=api_item.description or "",
173
- test_suite_url=api_item.test_suite_url or "",
174
- test_count=api_item.test_count or 0,
175
- test_case_count=api_item.test_case_count or 0,
176
- evaluates=list(api_item.evaluates) if api_item.evaluates else [],
177
- categories=list(api_item.categories) if api_item.categories else [],
178
- )
179
- for api_item in api_items
180
- ]
181
- if api_items
182
- else []
183
- )
184
-
185
-
186
121
  @dataclasses.dataclass
187
122
  class TestCase:
188
123
  """Represents a single test case, which contains tested prompt, expected answer
@@ -248,7 +183,6 @@ class Test:
248
183
  update_time: Optional[datetime.datetime] = None
249
184
  _client: Optional[api.ApiClient] = None
250
185
  _gen_tc_op_name: Optional[str] = None
251
- _lib_tc_op_name: Optional[str] = None
252
186
 
253
187
  def __post_init__(self):
254
188
  if self._client:
@@ -333,7 +267,7 @@ class Test:
333
267
 
334
268
  Args:
335
269
  count (int): Number of test cases to generate (generator may return fewer
336
- prompts).
270
+ prompts).
337
271
  model (str): Model to use for generating the prompts.
338
272
  base_llm_model (str): Base LLM model to use for generating the prompts.
339
273
  generators (List[TestCaseGenerator]): Methods to use for generation.
@@ -408,127 +342,6 @@ class Test:
408
342
 
409
343
  raise TimeoutError("Waiting timeout has been reached.")
410
344
 
411
- def list_test_suite_library_items(
412
- self,
413
- filter_by_categories: Optional[List[str]] = None,
414
- filter_by_purposes: Optional[List[str]] = None,
415
- filter_by_evaluates: Optional[List[str]] = None,
416
- filter_by_origin: Optional[str] = None,
417
- filter_by_test_case_count: Optional[int] = None,
418
- filter_by_test_count: Optional[int] = None,
419
- filter_by_fts: Optional[str] = None,
420
- ) -> List[TestCaseLibraryItem]:
421
- """Retrieves a list of all available items - suites of tests - in the library.
422
-
423
- Args:
424
- filter_by_categories (List[str]): List of categories to filter
425
- the library items.
426
- filter_by_purposes (List[str]): List of purposes to filter
427
- the library items.
428
- filter_by_evaluates (List[str]): List of evaluates to filter
429
- the library items.
430
- filter_by_origin (str): Origin to filter the library items.
431
- filter_by_test_case_count (int): Test case count to filter
432
- the library items.
433
- filter_by_test_count (int): Test count to filter the library items.
434
- filter_by_fts (str): FTS to filter the library items - phrase to search for.
435
-
436
- Returns:
437
- List[TestCaseLibraryItem]: List of library items.
438
- """
439
- req = models.TestServiceListTestCaseLibraryItemsRequest(
440
- filter_by_categories=filter_by_categories,
441
- filter_by_purposes=filter_by_purposes,
442
- filter_by_evaluates=filter_by_evaluates,
443
- filter_by_origin=filter_by_origin,
444
- filter_by_test_case_count=filter_by_test_case_count,
445
- filter_by_test_count=filter_by_test_count,
446
- filter_by_fts=filter_by_fts,
447
- )
448
-
449
- res = self._test_api.test_service_list_test_case_library_items(self.key, req)
450
- if res and res.prompt_library_items:
451
- return TestCaseLibraryItem._from_api_items(res.prompt_library_items)
452
-
453
- return []
454
-
455
- def add_library_test_cases(
456
- self, test_suite_url: str, count: int, test_document_urls: Optional[List[str]]
457
- ) -> None:
458
- """Sample test cases from the test suite library and add them to the test.
459
-
460
- Args:
461
- test_suite_url (str): The URL of the library test suite to get TestCases
462
- from (sample).
463
- count (int): The number of TestCases to get from the library.
464
- test_document_urls (List[str]): The list of target Test corpus
465
- document URLs to skip when returning library TestCases corpus.
466
- """
467
- req = models.TestServiceImportTestCasesFromLibraryRequest(
468
- test_suite_url=test_suite_url,
469
- count=count,
470
- test_document_urls=test_document_urls,
471
- )
472
-
473
- res = self._test_api.test_service_import_test_cases_from_library(self.key, req)
474
-
475
- op: models.V1Operation | None = res.operation
476
- self._lib_tc_op_name = op.name if op else None
477
-
478
- def wait_for_library_test_case_get(
479
- self, timeout: Optional[float] = None, verbose: bool = False
480
- ) -> None:
481
- """Waits for the library test cases(s) sampling to finish.
482
-
483
- Args:
484
- timeout (float): The maximum time to wait in seconds.
485
- verbose (bool): If True, prints the status of the handle while waiting.
486
- """
487
- if not self._lib_tc_op_name:
488
- raise ValueError(
489
- "There is no ongoing getting of test case(s) from the library - "
490
- "the operation name is not set."
491
- )
492
-
493
- if verbose:
494
- print(
495
- f"Waiting for getting library test case(s) operation to finish "
496
- f"({self._lib_tc_op_name}):"
497
- )
498
- if self._client:
499
- # exponential backoff
500
- wait_time = 1.0
501
- wait_coef = 1.6
502
- wait_max = 8.0
503
- wait_total = 0.0
504
- timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
505
- progress_bar = utils.ProgressBar()
506
- while wait_total < timeout:
507
- handle = _TestCaseLibraryGetHandle._from_operation(
508
- self._operation_api.operation_service_get_operation(
509
- self._lib_tc_op_name
510
- )
511
- )
512
-
513
- if verbose:
514
- progress_bar.update(handle.progress or 0, handle.progress_message)
515
-
516
- if handle.done:
517
- if handle.error:
518
- raise RuntimeError(
519
- f"Getting of library test case(s) failed: {handle.error}"
520
- )
521
- return
522
-
523
- wait_time *= wait_coef
524
- time.sleep(min(wait_time, wait_max))
525
- else:
526
- raise ValueError(
527
- "Unable to establish a connection to the Eval Studio host."
528
- )
529
-
530
- raise TimeoutError("Waiting timeout has been reached.")
531
-
532
345
  def delete(self, force=False):
533
346
  """Deletes the test.
534
347
 
@@ -1,9 +1,10 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: eval-studio-client
3
- Version: 1.0.1
3
+ Version: 1.0.3a1
4
4
  Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
5
5
  Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
6
6
  Author-email: "H2O.ai" <support@h2o.ai>
7
+ License-Expression: MIT
7
8
  Classifier: Development Status :: 4 - Beta
8
9
  Classifier: Programming Language :: Python
9
10
  Classifier: Programming Language :: Python :: 3.9