eval-studio-client 1.0.0a1__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. eval_studio_client/api/__init__.py +43 -0
  2. eval_studio_client/api/api/__init__.py +5 -0
  3. eval_studio_client/api/api/human_calibration_service_api.py +304 -0
  4. eval_studio_client/api/api/perturbator_service_api.py +268 -1
  5. eval_studio_client/api/api/prompt_library_service_api.py +669 -0
  6. eval_studio_client/api/api/test_service_api.py +568 -0
  7. eval_studio_client/api/api/workflow_edge_service_api.py +296 -0
  8. eval_studio_client/api/api/workflow_node_service_api.py +1634 -0
  9. eval_studio_client/api/api/workflow_service_api.py +1609 -0
  10. eval_studio_client/api/docs/HumanCalibrationServiceApi.md +77 -0
  11. eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +1 -0
  12. eval_studio_client/api/docs/PerturbatorServiceApi.md +33 -3
  13. eval_studio_client/api/docs/PromptGenerationServiceAutoGeneratePromptsRequest.md +2 -1
  14. eval_studio_client/api/docs/PromptLibraryServiceApi.md +155 -0
  15. eval_studio_client/api/docs/ProtobufNullValue.md +12 -0
  16. eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +1 -0
  17. eval_studio_client/api/docs/RequiredTheUpdatedWorkflow.md +44 -0
  18. eval_studio_client/api/docs/RequiredTheUpdatedWorkflowNode.md +44 -0
  19. eval_studio_client/api/docs/TestServiceApi.md +140 -0
  20. eval_studio_client/api/docs/TestServiceGenerateTestCasesRequest.md +1 -0
  21. eval_studio_client/api/docs/TestServiceImportTestCasesFromLibraryRequest.md +32 -0
  22. eval_studio_client/api/docs/TestServiceListTestCaseLibraryItemsRequest.md +35 -0
  23. eval_studio_client/api/docs/TestServicePerturbTestRequest.md +1 -0
  24. eval_studio_client/api/docs/V1BatchDeleteWorkflowsRequest.md +29 -0
  25. eval_studio_client/api/docs/V1BatchDeleteWorkflowsResponse.md +29 -0
  26. eval_studio_client/api/docs/V1BatchGetWorkflowEdgesResponse.md +29 -0
  27. eval_studio_client/api/docs/V1BatchGetWorkflowNodesResponse.md +29 -0
  28. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
  29. eval_studio_client/api/docs/V1CreateWorkflowResponse.md +29 -0
  30. eval_studio_client/api/docs/V1DeleteWorkflowNodeResponse.md +29 -0
  31. eval_studio_client/api/docs/V1DeleteWorkflowResponse.md +29 -0
  32. eval_studio_client/api/docs/V1EstimateThresholdRequest.md +33 -0
  33. eval_studio_client/api/docs/V1GetWorkflowNodePrerequisitesResponse.md +30 -0
  34. eval_studio_client/api/docs/V1GetWorkflowNodeResponse.md +29 -0
  35. eval_studio_client/api/docs/V1GetWorkflowResponse.md +29 -0
  36. eval_studio_client/api/docs/V1ImportEvaluationRequest.md +1 -0
  37. eval_studio_client/api/docs/V1ImportTestCasesFromLibraryResponse.md +29 -0
  38. eval_studio_client/api/docs/V1ImportTestCasesRequest.md +33 -0
  39. eval_studio_client/api/docs/V1LabeledTestCase.md +31 -0
  40. eval_studio_client/api/docs/V1ListPromptLibraryItemsResponse.md +29 -0
  41. eval_studio_client/api/docs/V1ListTestCaseLibraryItemsResponse.md +29 -0
  42. eval_studio_client/api/docs/V1ListWorkflowsResponse.md +29 -0
  43. eval_studio_client/api/docs/V1ProcessWorkflowNodeResponse.md +29 -0
  44. eval_studio_client/api/docs/V1PromptLibraryItem.md +42 -0
  45. eval_studio_client/api/docs/V1TestCase.md +1 -0
  46. eval_studio_client/api/docs/V1TestSuiteEvaluates.md +11 -0
  47. eval_studio_client/api/docs/V1UpdateWorkflowNodeResponse.md +29 -0
  48. eval_studio_client/api/docs/V1UpdateWorkflowResponse.md +29 -0
  49. eval_studio_client/api/docs/V1Workflow.md +46 -0
  50. eval_studio_client/api/docs/V1WorkflowEdge.md +40 -0
  51. eval_studio_client/api/docs/V1WorkflowEdgeType.md +12 -0
  52. eval_studio_client/api/docs/V1WorkflowNode.md +46 -0
  53. eval_studio_client/api/docs/V1WorkflowNodeArtifact.md +40 -0
  54. eval_studio_client/api/docs/V1WorkflowNodeArtifacts.md +29 -0
  55. eval_studio_client/api/docs/V1WorkflowNodeAttributes.md +30 -0
  56. eval_studio_client/api/docs/V1WorkflowNodeStatus.md +12 -0
  57. eval_studio_client/api/docs/V1WorkflowNodeType.md +12 -0
  58. eval_studio_client/api/docs/V1WorkflowNodeView.md +12 -0
  59. eval_studio_client/api/docs/V1WorkflowType.md +12 -0
  60. eval_studio_client/api/docs/WorkflowEdgeServiceApi.md +76 -0
  61. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +423 -0
  62. eval_studio_client/api/docs/WorkflowServiceApi.md +417 -0
  63. eval_studio_client/api/models/__init__.py +38 -0
  64. eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +8 -2
  65. eval_studio_client/api/models/prompt_generation_service_auto_generate_prompts_request.py +5 -3
  66. eval_studio_client/api/models/protobuf_null_value.py +36 -0
  67. eval_studio_client/api/models/required_the_test_case_to_update.py +6 -2
  68. eval_studio_client/api/models/required_the_updated_workflow.py +152 -0
  69. eval_studio_client/api/models/required_the_updated_workflow_node.py +152 -0
  70. eval_studio_client/api/models/test_service_generate_test_cases_request.py +4 -2
  71. eval_studio_client/api/models/test_service_import_test_cases_from_library_request.py +93 -0
  72. eval_studio_client/api/models/test_service_list_test_case_library_items_request.py +99 -0
  73. eval_studio_client/api/models/test_service_perturb_test_request.py +4 -2
  74. eval_studio_client/api/models/v1_batch_delete_workflows_request.py +87 -0
  75. eval_studio_client/api/models/v1_batch_delete_workflows_response.py +95 -0
  76. eval_studio_client/api/models/v1_batch_get_workflow_edges_response.py +95 -0
  77. eval_studio_client/api/models/v1_batch_get_workflow_nodes_response.py +95 -0
  78. eval_studio_client/api/models/v1_create_evaluation_request.py +7 -2
  79. eval_studio_client/api/models/v1_create_workflow_response.py +91 -0
  80. eval_studio_client/api/models/v1_delete_workflow_node_response.py +91 -0
  81. eval_studio_client/api/models/v1_delete_workflow_response.py +91 -0
  82. eval_studio_client/api/models/v1_estimate_threshold_request.py +103 -0
  83. eval_studio_client/api/models/v1_get_workflow_node_prerequisites_response.py +89 -0
  84. eval_studio_client/api/models/v1_get_workflow_node_response.py +91 -0
  85. eval_studio_client/api/models/v1_get_workflow_response.py +91 -0
  86. eval_studio_client/api/models/v1_import_evaluation_request.py +7 -2
  87. eval_studio_client/api/models/v1_import_test_cases_from_library_response.py +91 -0
  88. eval_studio_client/api/models/v1_import_test_cases_request.py +95 -0
  89. eval_studio_client/api/models/v1_labeled_test_case.py +91 -0
  90. eval_studio_client/api/models/v1_list_prompt_library_items_response.py +95 -0
  91. eval_studio_client/api/models/v1_list_test_case_library_items_response.py +95 -0
  92. eval_studio_client/api/models/v1_list_workflows_response.py +95 -0
  93. eval_studio_client/api/models/v1_process_workflow_node_response.py +91 -0
  94. eval_studio_client/api/models/v1_prompt_library_item.py +129 -0
  95. eval_studio_client/api/models/v1_test_case.py +6 -2
  96. eval_studio_client/api/models/v1_test_suite_evaluates.py +39 -0
  97. eval_studio_client/api/models/v1_update_workflow_node_response.py +91 -0
  98. eval_studio_client/api/models/v1_update_workflow_response.py +91 -0
  99. eval_studio_client/api/models/v1_workflow.py +156 -0
  100. eval_studio_client/api/models/v1_workflow_edge.py +123 -0
  101. eval_studio_client/api/models/v1_workflow_edge_type.py +37 -0
  102. eval_studio_client/api/models/v1_workflow_node.py +156 -0
  103. eval_studio_client/api/models/v1_workflow_node_artifact.py +122 -0
  104. eval_studio_client/api/models/v1_workflow_node_artifacts.py +97 -0
  105. eval_studio_client/api/models/v1_workflow_node_attributes.py +87 -0
  106. eval_studio_client/api/models/v1_workflow_node_status.py +40 -0
  107. eval_studio_client/api/models/v1_workflow_node_type.py +41 -0
  108. eval_studio_client/api/models/v1_workflow_node_view.py +38 -0
  109. eval_studio_client/api/models/v1_workflow_type.py +37 -0
  110. eval_studio_client/api/test/test_human_calibration_service_api.py +38 -0
  111. eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +20 -2
  112. eval_studio_client/api/test/test_prompt_generation_service_auto_generate_prompts_request.py +4 -1
  113. eval_studio_client/api/test/test_prompt_library_service_api.py +43 -0
  114. eval_studio_client/api/test/test_protobuf_null_value.py +33 -0
  115. eval_studio_client/api/test/test_required_the_test_case_to_update.py +4 -1
  116. eval_studio_client/api/test/test_required_the_updated_workflow.py +88 -0
  117. eval_studio_client/api/test/test_required_the_updated_workflow_node.py +80 -0
  118. eval_studio_client/api/test/test_test_service_api.py +12 -0
  119. eval_studio_client/api/test/test_test_service_generate_test_cases_request.py +4 -1
  120. eval_studio_client/api/test/test_test_service_import_test_cases_from_library_request.py +56 -0
  121. eval_studio_client/api/test/test_test_service_list_test_case_library_items_request.py +63 -0
  122. eval_studio_client/api/test/test_test_service_perturb_test_request.py +4 -1
  123. eval_studio_client/api/test/test_v1_batch_delete_test_cases_response.py +4 -1
  124. eval_studio_client/api/test/test_v1_batch_delete_workflows_request.py +53 -0
  125. eval_studio_client/api/test/test_v1_batch_delete_workflows_response.py +92 -0
  126. eval_studio_client/api/test/test_v1_batch_get_workflow_edges_response.py +64 -0
  127. eval_studio_client/api/test/test_v1_batch_get_workflow_nodes_response.py +84 -0
  128. eval_studio_client/api/test/test_v1_create_evaluation_request.py +20 -2
  129. eval_studio_client/api/test/test_v1_create_test_case_response.py +4 -1
  130. eval_studio_client/api/test/test_v1_create_workflow_response.py +90 -0
  131. eval_studio_client/api/test/test_v1_delete_test_case_response.py +4 -1
  132. eval_studio_client/api/test/test_v1_delete_workflow_node_response.py +82 -0
  133. eval_studio_client/api/test/test_v1_delete_workflow_response.py +90 -0
  134. eval_studio_client/api/test/test_v1_estimate_threshold_request.py +60 -0
  135. eval_studio_client/api/test/test_v1_evaluation_test.py +4 -1
  136. eval_studio_client/api/test/test_v1_find_all_test_cases_by_id_response.py +4 -1
  137. eval_studio_client/api/test/test_v1_get_test_case_response.py +4 -1
  138. eval_studio_client/api/test/test_v1_get_workflow_node_prerequisites_response.py +56 -0
  139. eval_studio_client/api/test/test_v1_get_workflow_node_response.py +82 -0
  140. eval_studio_client/api/test/test_v1_get_workflow_response.py +90 -0
  141. eval_studio_client/api/test/test_v1_import_evaluation_request.py +16 -1
  142. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +71 -0
  143. eval_studio_client/api/test/test_v1_import_test_cases_request.py +57 -0
  144. eval_studio_client/api/test/test_v1_labeled_test_case.py +53 -0
  145. eval_studio_client/api/test/test_v1_list_prompt_library_items_response.py +71 -0
  146. eval_studio_client/api/test/test_v1_list_test_case_library_items_response.py +71 -0
  147. eval_studio_client/api/test/test_v1_list_test_cases_response.py +4 -1
  148. eval_studio_client/api/test/test_v1_list_workflows_response.py +92 -0
  149. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +71 -0
  150. eval_studio_client/api/test/test_v1_prompt_library_item.py +68 -0
  151. eval_studio_client/api/test/test_v1_test_case.py +4 -1
  152. eval_studio_client/api/test/test_v1_test_suite_evaluates.py +33 -0
  153. eval_studio_client/api/test/test_v1_update_test_case_response.py +4 -1
  154. eval_studio_client/api/test/test_v1_update_workflow_node_response.py +82 -0
  155. eval_studio_client/api/test/test_v1_update_workflow_response.py +90 -0
  156. eval_studio_client/api/test/test_v1_workflow.py +89 -0
  157. eval_studio_client/api/test/test_v1_workflow_edge.py +61 -0
  158. eval_studio_client/api/test/test_v1_workflow_edge_type.py +33 -0
  159. eval_studio_client/api/test/test_v1_workflow_node.py +81 -0
  160. eval_studio_client/api/test/test_v1_workflow_node_artifact.py +61 -0
  161. eval_studio_client/api/test/test_v1_workflow_node_artifacts.py +64 -0
  162. eval_studio_client/api/test/test_v1_workflow_node_attributes.py +51 -0
  163. eval_studio_client/api/test/test_v1_workflow_node_status.py +33 -0
  164. eval_studio_client/api/test/test_v1_workflow_node_type.py +33 -0
  165. eval_studio_client/api/test/test_v1_workflow_node_view.py +33 -0
  166. eval_studio_client/api/test/test_v1_workflow_type.py +33 -0
  167. eval_studio_client/api/test/test_workflow_edge_service_api.py +38 -0
  168. eval_studio_client/api/test/test_workflow_node_service_api.py +73 -0
  169. eval_studio_client/api/test/test_workflow_service_api.py +73 -0
  170. eval_studio_client/client.py +7 -0
  171. eval_studio_client/dashboards.py +66 -18
  172. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2665 -794
  173. eval_studio_client/leaderboards.py +125 -0
  174. eval_studio_client/models.py +3 -42
  175. eval_studio_client/test_labs.py +49 -21
  176. eval_studio_client/tests.py +221 -51
  177. eval_studio_client/utils.py +26 -0
  178. {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.1.dist-info}/METADATA +1 -2
  179. {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.1.dist-info}/RECORD +180 -50
  180. {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.1.dist-info}/WHEEL +1 -1
@@ -11,6 +11,7 @@ from typing import Union
11
11
  from eval_studio_client import api
12
12
  from eval_studio_client import documents as d7s
13
13
  from eval_studio_client import perturbators as p10s
14
+ from eval_studio_client import utils
14
15
  from eval_studio_client.api import models
15
16
 
16
17
 
@@ -85,15 +86,9 @@ class TestCaseGenerator(enum.Enum):
85
86
 
86
87
 
87
88
  @dataclasses.dataclass
88
- class TestCaseGenerationHandle:
89
+ class _TestCaseGenerationHandle:
89
90
 
90
91
  name: Any | None
91
- create_time: Optional[datetime.datetime] = None
92
- creator: Optional[str] = None
93
- update_time: Optional[datetime.datetime] = None
94
- updater: Optional[str] = None
95
- delete_time: Optional[datetime.datetime] = None
96
- deleter: Optional[str] = None
97
92
  progress: Optional[float] = None
98
93
  progress_message: Optional[str] = None
99
94
  error: Optional[models.RpcStatus] = None
@@ -102,11 +97,40 @@ class TestCaseGenerationHandle:
102
97
  @staticmethod
103
98
  def _from_operation(
104
99
  res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
105
- ) -> "TestCaseGenerationHandle":
100
+ ) -> "_TestCaseGenerationHandle":
106
101
  """Converts an API operation to prompt generation handle."""
107
102
  op: models.V1Operation | None = res.operation
108
103
  if not op:
109
- return TestCaseGenerationHandle(name=None)
104
+ return _TestCaseGenerationHandle(name=None)
105
+
106
+ # progress
107
+ if hasattr(op, "metadata") and op.metadata:
108
+ meta_dict = op.metadata.to_dict() or {}
109
+ else:
110
+ meta_dict = {}
111
+
112
+ return _TestCaseGenerationHandle(
113
+ name=op.name,
114
+ progress=meta_dict.get("progress"),
115
+ progress_message=meta_dict.get("progressMessage"),
116
+ error=op.error,
117
+ done=op.done,
118
+ )
119
+
120
+
121
+ @dataclasses.dataclass
122
+ class _TestCaseLibraryGetHandle(_TestCaseGenerationHandle):
123
+
124
+ @staticmethod
125
+ def _from_operation(
126
+ res: (
127
+ models.V1ImportTestCasesFromLibraryResponse | models.V1GetOperationResponse
128
+ ),
129
+ ) -> "_TestCaseLibraryGetHandle":
130
+ """Converts an API operation to prompt library handle."""
131
+ op: models.V1Operation | None = res.operation
132
+ if not op:
133
+ return _TestCaseLibraryGetHandle(name=None)
110
134
 
111
135
  # progress
112
136
  if hasattr(op, "metadata") and op.metadata:
@@ -114,14 +138,8 @@ class TestCaseGenerationHandle:
114
138
  else:
115
139
  meta_dict = {}
116
140
 
117
- return TestCaseGenerationHandle(
141
+ return _TestCaseLibraryGetHandle(
118
142
  name=op.name,
119
- create_time=op.create_time,
120
- creator=op.creator,
121
- update_time=op.update_time,
122
- updater=op.updater,
123
- delete_time=op.delete_time,
124
- deleter=op.deleter,
125
143
  progress=meta_dict.get("progress"),
126
144
  progress_message=meta_dict.get("progressMessage"),
127
145
  error=op.error,
@@ -129,6 +147,42 @@ class TestCaseGenerationHandle:
129
147
  )
130
148
 
131
149
 
150
+ @dataclasses.dataclass
151
+ class TestCaseLibraryItem:
152
+ """Represents a single test case library item - test suite."""
153
+
154
+ key: str
155
+ name: str
156
+ description: str
157
+ test_suite_url: str
158
+ test_count: int
159
+ test_case_count: int
160
+ evaluates: List[str]
161
+ categories: List[str]
162
+
163
+ @staticmethod
164
+ def _from_api_items(
165
+ api_items: List[models.V1PromptLibraryItem],
166
+ ) -> List["TestCaseLibraryItem"]:
167
+ return (
168
+ [
169
+ TestCaseLibraryItem(
170
+ key=api_item.name or "",
171
+ name=api_item.display_name or "",
172
+ description=api_item.description or "",
173
+ test_suite_url=api_item.test_suite_url or "",
174
+ test_count=api_item.test_count or 0,
175
+ test_case_count=api_item.test_case_count or 0,
176
+ evaluates=list(api_item.evaluates) if api_item.evaluates else [],
177
+ categories=list(api_item.categories) if api_item.categories else [],
178
+ )
179
+ for api_item in api_items
180
+ ]
181
+ if api_items
182
+ else []
183
+ )
184
+
185
+
132
186
  @dataclasses.dataclass
133
187
  class TestCase:
134
188
  """Represents a single test case, which contains tested prompt, expected answer
@@ -193,6 +247,8 @@ class Test:
193
247
  create_time: Optional[datetime.datetime] = None
194
248
  update_time: Optional[datetime.datetime] = None
195
249
  _client: Optional[api.ApiClient] = None
250
+ _gen_tc_op_name: Optional[str] = None
251
+ _lib_tc_op_name: Optional[str] = None
196
252
 
197
253
  def __post_init__(self):
198
254
  if self._client:
@@ -272,12 +328,12 @@ class Test:
272
328
  base_llm_model: Optional[str] = None,
273
329
  generators: Optional[List[TestCaseGenerator]] = None,
274
330
  existing_collection: Optional[str] = None,
275
- ) -> "TestCaseGenerationHandle":
331
+ ) -> None:
276
332
  """Generates test cases based on the documents of the Test.
277
333
 
278
334
  Args:
279
335
  count (int): Number of test cases to generate (generator may return fewer
280
- prompts).
336
+ prompts).
281
337
  model (str): Model to use for generating the prompts.
282
338
  base_llm_model (str): Base LLM model to use for generating the prompts.
283
339
  generators (List[TestCaseGenerator]): Methods to use for generation.
@@ -296,28 +352,149 @@ class Test:
296
352
 
297
353
  res = self._test_api.test_service_generate_test_cases(self.key, req)
298
354
 
299
- return TestCaseGenerationHandle._from_operation(res)
355
+ op: models.V1Operation | None = res.operation
356
+ self._gen_tc_op_name = op.name if op else None
300
357
 
301
358
  def wait_for_test_case_generation(
302
- self,
303
- handle: TestCaseGenerationHandle,
304
- timeout: Optional[float] = None,
305
- verbose: bool = False,
306
- ) -> TestCaseGenerationHandle:
359
+ self, timeout: Optional[float] = None, verbose: bool = False
360
+ ) -> None:
307
361
  """Waits for the test case generation to finish.
308
362
 
309
363
  Args:
310
- handle (TestCaseGenerationHandle): Handle of the test case generation.
311
364
  timeout (float): The maximum time to wait in seconds.
312
365
  verbose (bool): If True, prints the status of the handle while waiting.
313
366
  """
314
- if not handle.name:
315
- raise ValueError("Test case generation handle is not valid.")
316
- elif handle.done:
317
- return handle
367
+ if not self._gen_tc_op_name:
368
+ raise ValueError(
369
+ "There is no ongoing test case generation - the operation name is not "
370
+ "set."
371
+ )
372
+
373
+ if verbose:
374
+ print(
375
+ f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
376
+ )
377
+ if self._client:
378
+ # exponential backoff
379
+ wait_time = 1.0
380
+ wait_coef = 1.6
381
+ wait_max = 8.0
382
+ wait_total = 0.0
383
+ timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
384
+ progress_bar = utils.ProgressBar()
385
+ while wait_total < timeout:
386
+ handle = _TestCaseGenerationHandle._from_operation(
387
+ self._operation_api.operation_service_get_operation(
388
+ self._gen_tc_op_name
389
+ )
390
+ )
391
+
392
+ if verbose:
393
+ progress_bar.update(handle.progress or 0, handle.progress_message)
394
+
395
+ if handle.done:
396
+ if handle.error:
397
+ raise RuntimeError(
398
+ f"Test case generation failed: {handle.error}"
399
+ )
400
+ return
401
+
402
+ wait_time *= wait_coef
403
+ time.sleep(min(wait_time, wait_max))
404
+ else:
405
+ raise ValueError(
406
+ "Unable to establish a connection to the Eval Studio host."
407
+ )
408
+
409
+ raise TimeoutError("Waiting timeout has been reached.")
410
+
411
+ def list_test_suite_library_items(
412
+ self,
413
+ filter_by_categories: Optional[List[str]] = None,
414
+ filter_by_purposes: Optional[List[str]] = None,
415
+ filter_by_evaluates: Optional[List[str]] = None,
416
+ filter_by_origin: Optional[str] = None,
417
+ filter_by_test_case_count: Optional[int] = None,
418
+ filter_by_test_count: Optional[int] = None,
419
+ filter_by_fts: Optional[str] = None,
420
+ ) -> List[TestCaseLibraryItem]:
421
+ """Retrieves a list of all available items - suites of tests - in the library.
422
+
423
+ Args:
424
+ filter_by_categories (List[str]): List of categories to filter
425
+ the library items.
426
+ filter_by_purposes (List[str]): List of purposes to filter
427
+ the library items.
428
+ filter_by_evaluates (List[str]): List of evaluates to filter
429
+ the library items.
430
+ filter_by_origin (str): Origin to filter the library items.
431
+ filter_by_test_case_count (int): Test case count to filter
432
+ the library items.
433
+ filter_by_test_count (int): Test count to filter the library items.
434
+ filter_by_fts (str): FTS to filter the library items - phrase to search for.
435
+
436
+ Returns:
437
+ List[TestCaseLibraryItem]: List of library items.
438
+ """
439
+ req = models.TestServiceListTestCaseLibraryItemsRequest(
440
+ filter_by_categories=filter_by_categories,
441
+ filter_by_purposes=filter_by_purposes,
442
+ filter_by_evaluates=filter_by_evaluates,
443
+ filter_by_origin=filter_by_origin,
444
+ filter_by_test_case_count=filter_by_test_case_count,
445
+ filter_by_test_count=filter_by_test_count,
446
+ filter_by_fts=filter_by_fts,
447
+ )
448
+
449
+ res = self._test_api.test_service_list_test_case_library_items(self.key, req)
450
+ if res and res.prompt_library_items:
451
+ return TestCaseLibraryItem._from_api_items(res.prompt_library_items)
452
+
453
+ return []
454
+
455
+ def add_library_test_cases(
456
+ self, test_suite_url: str, count: int, test_document_urls: Optional[List[str]]
457
+ ) -> None:
458
+ """Sample test cases from the test suite library and add them to the test.
459
+
460
+ Args:
461
+ test_suite_url (str): The URL of the library test suite to get TestCases
462
+ from (sample).
463
+ count (int): The number of TestCases to get from the library.
464
+ test_document_urls (List[str]): The list of target Test corpus
465
+ document URLs to skip when returning library TestCases corpus.
466
+ """
467
+ req = models.TestServiceImportTestCasesFromLibraryRequest(
468
+ test_suite_url=test_suite_url,
469
+ count=count,
470
+ test_document_urls=test_document_urls,
471
+ )
472
+
473
+ res = self._test_api.test_service_import_test_cases_from_library(self.key, req)
474
+
475
+ op: models.V1Operation | None = res.operation
476
+ self._lib_tc_op_name = op.name if op else None
477
+
478
+ def wait_for_library_test_case_get(
479
+ self, timeout: Optional[float] = None, verbose: bool = False
480
+ ) -> None:
481
+ """Waits for the library test cases(s) sampling to finish.
482
+
483
+ Args:
484
+ timeout (float): The maximum time to wait in seconds.
485
+ verbose (bool): If True, prints the status of the handle while waiting.
486
+ """
487
+ if not self._lib_tc_op_name:
488
+ raise ValueError(
489
+ "There is no ongoing getting of test case(s) from the library - "
490
+ "the operation name is not set."
491
+ )
318
492
 
319
493
  if verbose:
320
- print(f"Waiting for test case generation to finish ({handle.name}):")
494
+ print(
495
+ f"Waiting for getting library test case(s) operation to finish "
496
+ f"({self._lib_tc_op_name}):"
497
+ )
321
498
  if self._client:
322
499
  # exponential backoff
323
500
  wait_time = 1.0
@@ -325,37 +502,30 @@ class Test:
325
502
  wait_max = 8.0
326
503
  wait_total = 0.0
327
504
  timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
328
- # progress
329
- p_max = 1.0
330
- p_msg = ""
505
+ progress_bar = utils.ProgressBar()
331
506
  while wait_total < timeout:
332
- handle = TestCaseGenerationHandle._from_operation(
333
- self._operation_api.operation_service_get_operation(handle.name)
507
+ handle = _TestCaseLibraryGetHandle._from_operation(
508
+ self._operation_api.operation_service_get_operation(
509
+ self._lib_tc_op_name
510
+ )
334
511
  )
335
512
 
336
513
  if verbose:
337
- print(" " * len(p_msg), end="\r")
338
- if handle.progress or handle.progress_message:
339
- try:
340
- h_progress = float(str(handle.progress))
341
- except ValueError:
342
- h_progress = 0.0
343
- h_msg = handle.progress_message or "Processing"
344
- else:
345
- h_progress = 0.0
346
- h_msg = "Initializing"
347
- p_progress = int(h_progress / p_max * 100)
348
- p_hashes = p_progress // 5
349
- p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
350
- print(p_msg, end="\r")
514
+ progress_bar.update(handle.progress or 0, handle.progress_message)
351
515
 
352
516
  if handle.done:
353
- return handle
517
+ if handle.error:
518
+ raise RuntimeError(
519
+ f"Getting of library test case(s) failed: {handle.error}"
520
+ )
521
+ return
354
522
 
355
523
  wait_time *= wait_coef
356
524
  time.sleep(min(wait_time, wait_max))
357
525
  else:
358
- raise ValueError("Cannot establish connection to Eval Studio host.")
526
+ raise ValueError(
527
+ "Unable to establish a connection to the Eval Studio host."
528
+ )
359
529
 
360
530
  raise TimeoutError("Waiting timeout has been reached.")
361
531
 
@@ -0,0 +1,26 @@
1
+ from typing import Optional
2
+
3
+
4
+ class ProgressBar:
5
+ def __init__(self):
6
+ self.progress = 0.0
7
+ self.progress_message = "Initializing"
8
+ self._progress_max = 1.0
9
+
10
+ def update(self, progress: float, message: Optional[str] = None):
11
+ try:
12
+ self.progress = float(str(progress))
13
+ except ValueError:
14
+ self.progress = 0.0
15
+
16
+ if message:
17
+ self.progress_message = message or ""
18
+
19
+ self.print()
20
+
21
+ def print(self):
22
+ print(" " * len(self.progress_message), end="\r")
23
+ p_progress = int(self.progress / self._progress_max * 100)
24
+ p_hashes = p_progress // 5
25
+ p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
26
+ print(p_msg, end="\r")
@@ -1,10 +1,9 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: eval-studio-client
3
- Version: 1.0.0a1
3
+ Version: 1.0.1
4
4
  Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
5
5
  Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
6
6
  Author-email: "H2O.ai" <support@h2o.ai>
7
- License: MIT
8
7
  Classifier: Development Status :: 4 - Beta
9
8
  Classifier: Programming Language :: Python
10
9
  Classifier: Programming Language :: Python :: 3.9