eval-studio-client 1.0.0a1__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +43 -0
- eval_studio_client/api/api/__init__.py +5 -0
- eval_studio_client/api/api/human_calibration_service_api.py +304 -0
- eval_studio_client/api/api/perturbator_service_api.py +268 -1
- eval_studio_client/api/api/prompt_library_service_api.py +669 -0
- eval_studio_client/api/api/test_service_api.py +568 -0
- eval_studio_client/api/api/workflow_edge_service_api.py +296 -0
- eval_studio_client/api/api/workflow_node_service_api.py +1634 -0
- eval_studio_client/api/api/workflow_service_api.py +1609 -0
- eval_studio_client/api/docs/HumanCalibrationServiceApi.md +77 -0
- eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +1 -0
- eval_studio_client/api/docs/PerturbatorServiceApi.md +33 -3
- eval_studio_client/api/docs/PromptGenerationServiceAutoGeneratePromptsRequest.md +2 -1
- eval_studio_client/api/docs/PromptLibraryServiceApi.md +155 -0
- eval_studio_client/api/docs/ProtobufNullValue.md +12 -0
- eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheUpdatedWorkflow.md +44 -0
- eval_studio_client/api/docs/RequiredTheUpdatedWorkflowNode.md +44 -0
- eval_studio_client/api/docs/TestServiceApi.md +140 -0
- eval_studio_client/api/docs/TestServiceGenerateTestCasesRequest.md +1 -0
- eval_studio_client/api/docs/TestServiceImportTestCasesFromLibraryRequest.md +32 -0
- eval_studio_client/api/docs/TestServiceListTestCaseLibraryItemsRequest.md +35 -0
- eval_studio_client/api/docs/TestServicePerturbTestRequest.md +1 -0
- eval_studio_client/api/docs/V1BatchDeleteWorkflowsRequest.md +29 -0
- eval_studio_client/api/docs/V1BatchDeleteWorkflowsResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchGetWorkflowEdgesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchGetWorkflowNodesResponse.md +29 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1DeleteWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1DeleteWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1EstimateThresholdRequest.md +33 -0
- eval_studio_client/api/docs/V1GetWorkflowNodePrerequisitesResponse.md +30 -0
- eval_studio_client/api/docs/V1GetWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1GetWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1ImportEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1ImportTestCasesFromLibraryResponse.md +29 -0
- eval_studio_client/api/docs/V1ImportTestCasesRequest.md +33 -0
- eval_studio_client/api/docs/V1LabeledTestCase.md +31 -0
- eval_studio_client/api/docs/V1ListPromptLibraryItemsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListTestCaseLibraryItemsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListWorkflowsResponse.md +29 -0
- eval_studio_client/api/docs/V1ProcessWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1PromptLibraryItem.md +42 -0
- eval_studio_client/api/docs/V1TestCase.md +1 -0
- eval_studio_client/api/docs/V1TestSuiteEvaluates.md +11 -0
- eval_studio_client/api/docs/V1UpdateWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Workflow.md +46 -0
- eval_studio_client/api/docs/V1WorkflowEdge.md +40 -0
- eval_studio_client/api/docs/V1WorkflowEdgeType.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNode.md +46 -0
- eval_studio_client/api/docs/V1WorkflowNodeArtifact.md +40 -0
- eval_studio_client/api/docs/V1WorkflowNodeArtifacts.md +29 -0
- eval_studio_client/api/docs/V1WorkflowNodeAttributes.md +30 -0
- eval_studio_client/api/docs/V1WorkflowNodeStatus.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNodeType.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNodeView.md +12 -0
- eval_studio_client/api/docs/V1WorkflowType.md +12 -0
- eval_studio_client/api/docs/WorkflowEdgeServiceApi.md +76 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +423 -0
- eval_studio_client/api/docs/WorkflowServiceApi.md +417 -0
- eval_studio_client/api/models/__init__.py +38 -0
- eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +8 -2
- eval_studio_client/api/models/prompt_generation_service_auto_generate_prompts_request.py +5 -3
- eval_studio_client/api/models/protobuf_null_value.py +36 -0
- eval_studio_client/api/models/required_the_test_case_to_update.py +6 -2
- eval_studio_client/api/models/required_the_updated_workflow.py +152 -0
- eval_studio_client/api/models/required_the_updated_workflow_node.py +152 -0
- eval_studio_client/api/models/test_service_generate_test_cases_request.py +4 -2
- eval_studio_client/api/models/test_service_import_test_cases_from_library_request.py +93 -0
- eval_studio_client/api/models/test_service_list_test_case_library_items_request.py +99 -0
- eval_studio_client/api/models/test_service_perturb_test_request.py +4 -2
- eval_studio_client/api/models/v1_batch_delete_workflows_request.py +87 -0
- eval_studio_client/api/models/v1_batch_delete_workflows_response.py +95 -0
- eval_studio_client/api/models/v1_batch_get_workflow_edges_response.py +95 -0
- eval_studio_client/api/models/v1_batch_get_workflow_nodes_response.py +95 -0
- eval_studio_client/api/models/v1_create_evaluation_request.py +7 -2
- eval_studio_client/api/models/v1_create_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_delete_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_delete_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_estimate_threshold_request.py +103 -0
- eval_studio_client/api/models/v1_get_workflow_node_prerequisites_response.py +89 -0
- eval_studio_client/api/models/v1_get_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_get_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_import_evaluation_request.py +7 -2
- eval_studio_client/api/models/v1_import_test_cases_from_library_response.py +91 -0
- eval_studio_client/api/models/v1_import_test_cases_request.py +95 -0
- eval_studio_client/api/models/v1_labeled_test_case.py +91 -0
- eval_studio_client/api/models/v1_list_prompt_library_items_response.py +95 -0
- eval_studio_client/api/models/v1_list_test_case_library_items_response.py +95 -0
- eval_studio_client/api/models/v1_list_workflows_response.py +95 -0
- eval_studio_client/api/models/v1_process_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_prompt_library_item.py +129 -0
- eval_studio_client/api/models/v1_test_case.py +6 -2
- eval_studio_client/api/models/v1_test_suite_evaluates.py +39 -0
- eval_studio_client/api/models/v1_update_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_update_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_workflow.py +156 -0
- eval_studio_client/api/models/v1_workflow_edge.py +123 -0
- eval_studio_client/api/models/v1_workflow_edge_type.py +37 -0
- eval_studio_client/api/models/v1_workflow_node.py +156 -0
- eval_studio_client/api/models/v1_workflow_node_artifact.py +122 -0
- eval_studio_client/api/models/v1_workflow_node_artifacts.py +97 -0
- eval_studio_client/api/models/v1_workflow_node_attributes.py +87 -0
- eval_studio_client/api/models/v1_workflow_node_status.py +40 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +41 -0
- eval_studio_client/api/models/v1_workflow_node_view.py +38 -0
- eval_studio_client/api/models/v1_workflow_type.py +37 -0
- eval_studio_client/api/test/test_human_calibration_service_api.py +38 -0
- eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +20 -2
- eval_studio_client/api/test/test_prompt_generation_service_auto_generate_prompts_request.py +4 -1
- eval_studio_client/api/test/test_prompt_library_service_api.py +43 -0
- eval_studio_client/api/test/test_protobuf_null_value.py +33 -0
- eval_studio_client/api/test/test_required_the_test_case_to_update.py +4 -1
- eval_studio_client/api/test/test_required_the_updated_workflow.py +88 -0
- eval_studio_client/api/test/test_required_the_updated_workflow_node.py +80 -0
- eval_studio_client/api/test/test_test_service_api.py +12 -0
- eval_studio_client/api/test/test_test_service_generate_test_cases_request.py +4 -1
- eval_studio_client/api/test/test_test_service_import_test_cases_from_library_request.py +56 -0
- eval_studio_client/api/test/test_test_service_list_test_case_library_items_request.py +63 -0
- eval_studio_client/api/test/test_test_service_perturb_test_request.py +4 -1
- eval_studio_client/api/test/test_v1_batch_delete_test_cases_response.py +4 -1
- eval_studio_client/api/test/test_v1_batch_delete_workflows_request.py +53 -0
- eval_studio_client/api/test/test_v1_batch_delete_workflows_response.py +92 -0
- eval_studio_client/api/test/test_v1_batch_get_workflow_edges_response.py +64 -0
- eval_studio_client/api/test/test_v1_batch_get_workflow_nodes_response.py +84 -0
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +20 -2
- eval_studio_client/api/test/test_v1_create_test_case_response.py +4 -1
- eval_studio_client/api/test/test_v1_create_workflow_response.py +90 -0
- eval_studio_client/api/test/test_v1_delete_test_case_response.py +4 -1
- eval_studio_client/api/test/test_v1_delete_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_delete_workflow_response.py +90 -0
- eval_studio_client/api/test/test_v1_estimate_threshold_request.py +60 -0
- eval_studio_client/api/test/test_v1_evaluation_test.py +4 -1
- eval_studio_client/api/test/test_v1_find_all_test_cases_by_id_response.py +4 -1
- eval_studio_client/api/test/test_v1_get_test_case_response.py +4 -1
- eval_studio_client/api/test/test_v1_get_workflow_node_prerequisites_response.py +56 -0
- eval_studio_client/api/test/test_v1_get_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_get_workflow_response.py +90 -0
- eval_studio_client/api/test/test_v1_import_evaluation_request.py +16 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +71 -0
- eval_studio_client/api/test/test_v1_import_test_cases_request.py +57 -0
- eval_studio_client/api/test/test_v1_labeled_test_case.py +53 -0
- eval_studio_client/api/test/test_v1_list_prompt_library_items_response.py +71 -0
- eval_studio_client/api/test/test_v1_list_test_case_library_items_response.py +71 -0
- eval_studio_client/api/test/test_v1_list_test_cases_response.py +4 -1
- eval_studio_client/api/test/test_v1_list_workflows_response.py +92 -0
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +71 -0
- eval_studio_client/api/test/test_v1_prompt_library_item.py +68 -0
- eval_studio_client/api/test/test_v1_test_case.py +4 -1
- eval_studio_client/api/test/test_v1_test_suite_evaluates.py +33 -0
- eval_studio_client/api/test/test_v1_update_test_case_response.py +4 -1
- eval_studio_client/api/test/test_v1_update_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_update_workflow_response.py +90 -0
- eval_studio_client/api/test/test_v1_workflow.py +89 -0
- eval_studio_client/api/test/test_v1_workflow_edge.py +61 -0
- eval_studio_client/api/test/test_v1_workflow_edge_type.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node.py +81 -0
- eval_studio_client/api/test/test_v1_workflow_node_artifact.py +61 -0
- eval_studio_client/api/test/test_v1_workflow_node_artifacts.py +64 -0
- eval_studio_client/api/test/test_v1_workflow_node_attributes.py +51 -0
- eval_studio_client/api/test/test_v1_workflow_node_status.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node_type.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node_view.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_type.py +33 -0
- eval_studio_client/api/test/test_workflow_edge_service_api.py +38 -0
- eval_studio_client/api/test/test_workflow_node_service_api.py +73 -0
- eval_studio_client/api/test/test_workflow_service_api.py +73 -0
- eval_studio_client/client.py +7 -0
- eval_studio_client/dashboards.py +66 -18
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2665 -794
- eval_studio_client/leaderboards.py +125 -0
- eval_studio_client/models.py +3 -42
- eval_studio_client/test_labs.py +49 -21
- eval_studio_client/tests.py +221 -51
- eval_studio_client/utils.py +26 -0
- {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.1.dist-info}/METADATA +1 -2
- {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.1.dist-info}/RECORD +180 -50
- {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.0.1.dist-info}/WHEEL +1 -1
eval_studio_client/tests.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Union
|
|
|
11
11
|
from eval_studio_client import api
|
|
12
12
|
from eval_studio_client import documents as d7s
|
|
13
13
|
from eval_studio_client import perturbators as p10s
|
|
14
|
+
from eval_studio_client import utils
|
|
14
15
|
from eval_studio_client.api import models
|
|
15
16
|
|
|
16
17
|
|
|
@@ -85,15 +86,9 @@ class TestCaseGenerator(enum.Enum):
|
|
|
85
86
|
|
|
86
87
|
|
|
87
88
|
@dataclasses.dataclass
|
|
88
|
-
class
|
|
89
|
+
class _TestCaseGenerationHandle:
|
|
89
90
|
|
|
90
91
|
name: Any | None
|
|
91
|
-
create_time: Optional[datetime.datetime] = None
|
|
92
|
-
creator: Optional[str] = None
|
|
93
|
-
update_time: Optional[datetime.datetime] = None
|
|
94
|
-
updater: Optional[str] = None
|
|
95
|
-
delete_time: Optional[datetime.datetime] = None
|
|
96
|
-
deleter: Optional[str] = None
|
|
97
92
|
progress: Optional[float] = None
|
|
98
93
|
progress_message: Optional[str] = None
|
|
99
94
|
error: Optional[models.RpcStatus] = None
|
|
@@ -102,11 +97,40 @@ class TestCaseGenerationHandle:
|
|
|
102
97
|
@staticmethod
|
|
103
98
|
def _from_operation(
|
|
104
99
|
res: models.V1GenerateTestCasesResponse | models.V1GetOperationResponse,
|
|
105
|
-
) -> "
|
|
100
|
+
) -> "_TestCaseGenerationHandle":
|
|
106
101
|
"""Converts an API operation to prompt generation handle."""
|
|
107
102
|
op: models.V1Operation | None = res.operation
|
|
108
103
|
if not op:
|
|
109
|
-
return
|
|
104
|
+
return _TestCaseGenerationHandle(name=None)
|
|
105
|
+
|
|
106
|
+
# progress
|
|
107
|
+
if hasattr(op, "metadata") and op.metadata:
|
|
108
|
+
meta_dict = op.metadata.to_dict() or {}
|
|
109
|
+
else:
|
|
110
|
+
meta_dict = {}
|
|
111
|
+
|
|
112
|
+
return _TestCaseGenerationHandle(
|
|
113
|
+
name=op.name,
|
|
114
|
+
progress=meta_dict.get("progress"),
|
|
115
|
+
progress_message=meta_dict.get("progressMessage"),
|
|
116
|
+
error=op.error,
|
|
117
|
+
done=op.done,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclasses.dataclass
|
|
122
|
+
class _TestCaseLibraryGetHandle(_TestCaseGenerationHandle):
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _from_operation(
|
|
126
|
+
res: (
|
|
127
|
+
models.V1ImportTestCasesFromLibraryResponse | models.V1GetOperationResponse
|
|
128
|
+
),
|
|
129
|
+
) -> "_TestCaseLibraryGetHandle":
|
|
130
|
+
"""Converts an API operation to prompt library handle."""
|
|
131
|
+
op: models.V1Operation | None = res.operation
|
|
132
|
+
if not op:
|
|
133
|
+
return _TestCaseLibraryGetHandle(name=None)
|
|
110
134
|
|
|
111
135
|
# progress
|
|
112
136
|
if hasattr(op, "metadata") and op.metadata:
|
|
@@ -114,14 +138,8 @@ class TestCaseGenerationHandle:
|
|
|
114
138
|
else:
|
|
115
139
|
meta_dict = {}
|
|
116
140
|
|
|
117
|
-
return
|
|
141
|
+
return _TestCaseLibraryGetHandle(
|
|
118
142
|
name=op.name,
|
|
119
|
-
create_time=op.create_time,
|
|
120
|
-
creator=op.creator,
|
|
121
|
-
update_time=op.update_time,
|
|
122
|
-
updater=op.updater,
|
|
123
|
-
delete_time=op.delete_time,
|
|
124
|
-
deleter=op.deleter,
|
|
125
143
|
progress=meta_dict.get("progress"),
|
|
126
144
|
progress_message=meta_dict.get("progressMessage"),
|
|
127
145
|
error=op.error,
|
|
@@ -129,6 +147,42 @@ class TestCaseGenerationHandle:
|
|
|
129
147
|
)
|
|
130
148
|
|
|
131
149
|
|
|
150
|
+
@dataclasses.dataclass
|
|
151
|
+
class TestCaseLibraryItem:
|
|
152
|
+
"""Represents a single test case library item - test suite."""
|
|
153
|
+
|
|
154
|
+
key: str
|
|
155
|
+
name: str
|
|
156
|
+
description: str
|
|
157
|
+
test_suite_url: str
|
|
158
|
+
test_count: int
|
|
159
|
+
test_case_count: int
|
|
160
|
+
evaluates: List[str]
|
|
161
|
+
categories: List[str]
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def _from_api_items(
|
|
165
|
+
api_items: List[models.V1PromptLibraryItem],
|
|
166
|
+
) -> List["TestCaseLibraryItem"]:
|
|
167
|
+
return (
|
|
168
|
+
[
|
|
169
|
+
TestCaseLibraryItem(
|
|
170
|
+
key=api_item.name or "",
|
|
171
|
+
name=api_item.display_name or "",
|
|
172
|
+
description=api_item.description or "",
|
|
173
|
+
test_suite_url=api_item.test_suite_url or "",
|
|
174
|
+
test_count=api_item.test_count or 0,
|
|
175
|
+
test_case_count=api_item.test_case_count or 0,
|
|
176
|
+
evaluates=list(api_item.evaluates) if api_item.evaluates else [],
|
|
177
|
+
categories=list(api_item.categories) if api_item.categories else [],
|
|
178
|
+
)
|
|
179
|
+
for api_item in api_items
|
|
180
|
+
]
|
|
181
|
+
if api_items
|
|
182
|
+
else []
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
132
186
|
@dataclasses.dataclass
|
|
133
187
|
class TestCase:
|
|
134
188
|
"""Represents a single test case, which contains tested prompt, expected answer
|
|
@@ -193,6 +247,8 @@ class Test:
|
|
|
193
247
|
create_time: Optional[datetime.datetime] = None
|
|
194
248
|
update_time: Optional[datetime.datetime] = None
|
|
195
249
|
_client: Optional[api.ApiClient] = None
|
|
250
|
+
_gen_tc_op_name: Optional[str] = None
|
|
251
|
+
_lib_tc_op_name: Optional[str] = None
|
|
196
252
|
|
|
197
253
|
def __post_init__(self):
|
|
198
254
|
if self._client:
|
|
@@ -272,12 +328,12 @@ class Test:
|
|
|
272
328
|
base_llm_model: Optional[str] = None,
|
|
273
329
|
generators: Optional[List[TestCaseGenerator]] = None,
|
|
274
330
|
existing_collection: Optional[str] = None,
|
|
275
|
-
) ->
|
|
331
|
+
) -> None:
|
|
276
332
|
"""Generates test cases based on the documents of the Test.
|
|
277
333
|
|
|
278
334
|
Args:
|
|
279
335
|
count (int): Number of test cases to generate (generator may return fewer
|
|
280
|
-
|
|
336
|
+
prompts).
|
|
281
337
|
model (str): Model to use for generating the prompts.
|
|
282
338
|
base_llm_model (str): Base LLM model to use for generating the prompts.
|
|
283
339
|
generators (List[TestCaseGenerator]): Methods to use for generation.
|
|
@@ -296,28 +352,149 @@ class Test:
|
|
|
296
352
|
|
|
297
353
|
res = self._test_api.test_service_generate_test_cases(self.key, req)
|
|
298
354
|
|
|
299
|
-
|
|
355
|
+
op: models.V1Operation | None = res.operation
|
|
356
|
+
self._gen_tc_op_name = op.name if op else None
|
|
300
357
|
|
|
301
358
|
def wait_for_test_case_generation(
|
|
302
|
-
self,
|
|
303
|
-
|
|
304
|
-
timeout: Optional[float] = None,
|
|
305
|
-
verbose: bool = False,
|
|
306
|
-
) -> TestCaseGenerationHandle:
|
|
359
|
+
self, timeout: Optional[float] = None, verbose: bool = False
|
|
360
|
+
) -> None:
|
|
307
361
|
"""Waits for the test case generation to finish.
|
|
308
362
|
|
|
309
363
|
Args:
|
|
310
|
-
handle (TestCaseGenerationHandle): Handle of the test case generation.
|
|
311
364
|
timeout (float): The maximum time to wait in seconds.
|
|
312
365
|
verbose (bool): If True, prints the status of the handle while waiting.
|
|
313
366
|
"""
|
|
314
|
-
if not
|
|
315
|
-
raise ValueError(
|
|
316
|
-
|
|
317
|
-
|
|
367
|
+
if not self._gen_tc_op_name:
|
|
368
|
+
raise ValueError(
|
|
369
|
+
"There is no ongoing test case generation - the operation name is not "
|
|
370
|
+
"set."
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
if verbose:
|
|
374
|
+
print(
|
|
375
|
+
f"Waiting for test case generation to finish ({self._gen_tc_op_name}):"
|
|
376
|
+
)
|
|
377
|
+
if self._client:
|
|
378
|
+
# exponential backoff
|
|
379
|
+
wait_time = 1.0
|
|
380
|
+
wait_coef = 1.6
|
|
381
|
+
wait_max = 8.0
|
|
382
|
+
wait_total = 0.0
|
|
383
|
+
timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
|
|
384
|
+
progress_bar = utils.ProgressBar()
|
|
385
|
+
while wait_total < timeout:
|
|
386
|
+
handle = _TestCaseGenerationHandle._from_operation(
|
|
387
|
+
self._operation_api.operation_service_get_operation(
|
|
388
|
+
self._gen_tc_op_name
|
|
389
|
+
)
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if verbose:
|
|
393
|
+
progress_bar.update(handle.progress or 0, handle.progress_message)
|
|
394
|
+
|
|
395
|
+
if handle.done:
|
|
396
|
+
if handle.error:
|
|
397
|
+
raise RuntimeError(
|
|
398
|
+
f"Test case generation failed: {handle.error}"
|
|
399
|
+
)
|
|
400
|
+
return
|
|
401
|
+
|
|
402
|
+
wait_time *= wait_coef
|
|
403
|
+
time.sleep(min(wait_time, wait_max))
|
|
404
|
+
else:
|
|
405
|
+
raise ValueError(
|
|
406
|
+
"Unable to establish a connection to the Eval Studio host."
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
raise TimeoutError("Waiting timeout has been reached.")
|
|
410
|
+
|
|
411
|
+
def list_test_suite_library_items(
|
|
412
|
+
self,
|
|
413
|
+
filter_by_categories: Optional[List[str]] = None,
|
|
414
|
+
filter_by_purposes: Optional[List[str]] = None,
|
|
415
|
+
filter_by_evaluates: Optional[List[str]] = None,
|
|
416
|
+
filter_by_origin: Optional[str] = None,
|
|
417
|
+
filter_by_test_case_count: Optional[int] = None,
|
|
418
|
+
filter_by_test_count: Optional[int] = None,
|
|
419
|
+
filter_by_fts: Optional[str] = None,
|
|
420
|
+
) -> List[TestCaseLibraryItem]:
|
|
421
|
+
"""Retrieves a list of all available items - suites of tests - in the library.
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
filter_by_categories (List[str]): List of categories to filter
|
|
425
|
+
the library items.
|
|
426
|
+
filter_by_purposes (List[str]): List of purposes to filter
|
|
427
|
+
the library items.
|
|
428
|
+
filter_by_evaluates (List[str]): List of evaluates to filter
|
|
429
|
+
the library items.
|
|
430
|
+
filter_by_origin (str): Origin to filter the library items.
|
|
431
|
+
filter_by_test_case_count (int): Test case count to filter
|
|
432
|
+
the library items.
|
|
433
|
+
filter_by_test_count (int): Test count to filter the library items.
|
|
434
|
+
filter_by_fts (str): FTS to filter the library items - phrase to search for.
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
List[TestCaseLibraryItem]: List of library items.
|
|
438
|
+
"""
|
|
439
|
+
req = models.TestServiceListTestCaseLibraryItemsRequest(
|
|
440
|
+
filter_by_categories=filter_by_categories,
|
|
441
|
+
filter_by_purposes=filter_by_purposes,
|
|
442
|
+
filter_by_evaluates=filter_by_evaluates,
|
|
443
|
+
filter_by_origin=filter_by_origin,
|
|
444
|
+
filter_by_test_case_count=filter_by_test_case_count,
|
|
445
|
+
filter_by_test_count=filter_by_test_count,
|
|
446
|
+
filter_by_fts=filter_by_fts,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
res = self._test_api.test_service_list_test_case_library_items(self.key, req)
|
|
450
|
+
if res and res.prompt_library_items:
|
|
451
|
+
return TestCaseLibraryItem._from_api_items(res.prompt_library_items)
|
|
452
|
+
|
|
453
|
+
return []
|
|
454
|
+
|
|
455
|
+
def add_library_test_cases(
|
|
456
|
+
self, test_suite_url: str, count: int, test_document_urls: Optional[List[str]]
|
|
457
|
+
) -> None:
|
|
458
|
+
"""Sample test cases from the test suite library and add them to the test.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
test_suite_url (str): The URL of the library test suite to get TestCases
|
|
462
|
+
from (sample).
|
|
463
|
+
count (int): The number of TestCases to get from the library.
|
|
464
|
+
test_document_urls (List[str]): The list of target Test corpus
|
|
465
|
+
document URLs to skip when returning library TestCases corpus.
|
|
466
|
+
"""
|
|
467
|
+
req = models.TestServiceImportTestCasesFromLibraryRequest(
|
|
468
|
+
test_suite_url=test_suite_url,
|
|
469
|
+
count=count,
|
|
470
|
+
test_document_urls=test_document_urls,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
res = self._test_api.test_service_import_test_cases_from_library(self.key, req)
|
|
474
|
+
|
|
475
|
+
op: models.V1Operation | None = res.operation
|
|
476
|
+
self._lib_tc_op_name = op.name if op else None
|
|
477
|
+
|
|
478
|
+
def wait_for_library_test_case_get(
|
|
479
|
+
self, timeout: Optional[float] = None, verbose: bool = False
|
|
480
|
+
) -> None:
|
|
481
|
+
"""Waits for the library test cases(s) sampling to finish.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
timeout (float): The maximum time to wait in seconds.
|
|
485
|
+
verbose (bool): If True, prints the status of the handle while waiting.
|
|
486
|
+
"""
|
|
487
|
+
if not self._lib_tc_op_name:
|
|
488
|
+
raise ValueError(
|
|
489
|
+
"There is no ongoing getting of test case(s) from the library - "
|
|
490
|
+
"the operation name is not set."
|
|
491
|
+
)
|
|
318
492
|
|
|
319
493
|
if verbose:
|
|
320
|
-
print(
|
|
494
|
+
print(
|
|
495
|
+
f"Waiting for getting library test case(s) operation to finish "
|
|
496
|
+
f"({self._lib_tc_op_name}):"
|
|
497
|
+
)
|
|
321
498
|
if self._client:
|
|
322
499
|
# exponential backoff
|
|
323
500
|
wait_time = 1.0
|
|
@@ -325,37 +502,30 @@ class Test:
|
|
|
325
502
|
wait_max = 8.0
|
|
326
503
|
wait_total = 0.0
|
|
327
504
|
timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
|
|
328
|
-
|
|
329
|
-
p_max = 1.0
|
|
330
|
-
p_msg = ""
|
|
505
|
+
progress_bar = utils.ProgressBar()
|
|
331
506
|
while wait_total < timeout:
|
|
332
|
-
handle =
|
|
333
|
-
self._operation_api.operation_service_get_operation(
|
|
507
|
+
handle = _TestCaseLibraryGetHandle._from_operation(
|
|
508
|
+
self._operation_api.operation_service_get_operation(
|
|
509
|
+
self._lib_tc_op_name
|
|
510
|
+
)
|
|
334
511
|
)
|
|
335
512
|
|
|
336
513
|
if verbose:
|
|
337
|
-
|
|
338
|
-
if handle.progress or handle.progress_message:
|
|
339
|
-
try:
|
|
340
|
-
h_progress = float(str(handle.progress))
|
|
341
|
-
except ValueError:
|
|
342
|
-
h_progress = 0.0
|
|
343
|
-
h_msg = handle.progress_message or "Processing"
|
|
344
|
-
else:
|
|
345
|
-
h_progress = 0.0
|
|
346
|
-
h_msg = "Initializing"
|
|
347
|
-
p_progress = int(h_progress / p_max * 100)
|
|
348
|
-
p_hashes = p_progress // 5
|
|
349
|
-
p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {h_msg}"
|
|
350
|
-
print(p_msg, end="\r")
|
|
514
|
+
progress_bar.update(handle.progress or 0, handle.progress_message)
|
|
351
515
|
|
|
352
516
|
if handle.done:
|
|
353
|
-
|
|
517
|
+
if handle.error:
|
|
518
|
+
raise RuntimeError(
|
|
519
|
+
f"Getting of library test case(s) failed: {handle.error}"
|
|
520
|
+
)
|
|
521
|
+
return
|
|
354
522
|
|
|
355
523
|
wait_time *= wait_coef
|
|
356
524
|
time.sleep(min(wait_time, wait_max))
|
|
357
525
|
else:
|
|
358
|
-
raise ValueError(
|
|
526
|
+
raise ValueError(
|
|
527
|
+
"Unable to establish a connection to the Eval Studio host."
|
|
528
|
+
)
|
|
359
529
|
|
|
360
530
|
raise TimeoutError("Waiting timeout has been reached.")
|
|
361
531
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ProgressBar:
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self.progress = 0.0
|
|
7
|
+
self.progress_message = "Initializing"
|
|
8
|
+
self._progress_max = 1.0
|
|
9
|
+
|
|
10
|
+
def update(self, progress: float, message: Optional[str] = None):
|
|
11
|
+
try:
|
|
12
|
+
self.progress = float(str(progress))
|
|
13
|
+
except ValueError:
|
|
14
|
+
self.progress = 0.0
|
|
15
|
+
|
|
16
|
+
if message:
|
|
17
|
+
self.progress_message = message or ""
|
|
18
|
+
|
|
19
|
+
self.print()
|
|
20
|
+
|
|
21
|
+
def print(self):
|
|
22
|
+
print(" " * len(self.progress_message), end="\r")
|
|
23
|
+
p_progress = int(self.progress / self._progress_max * 100)
|
|
24
|
+
p_hashes = p_progress // 5
|
|
25
|
+
p_msg = f" {p_progress:>3}% |{'#' * p_hashes:<20}| {self.progress_message}"
|
|
26
|
+
print(p_msg, end="\r")
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: eval-studio-client
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
|
|
5
5
|
Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
|
|
6
6
|
Author-email: "H2O.ai" <support@h2o.ai>
|
|
7
|
-
License: MIT
|
|
8
7
|
Classifier: Development Status :: 4 - Beta
|
|
9
8
|
Classifier: Programming Language :: Python
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.9
|