eval-studio-client 1.0.3a1__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +83 -1
- eval_studio_client/api/api/__init__.py +8 -0
- eval_studio_client/api/api/adversarial_inputs_service_api.py +321 -0
- eval_studio_client/api/api/dashboard_service_api.py +18 -1
- eval_studio_client/api/api/document_service_api.py +1 -1
- eval_studio_client/api/api/evaluation_service_api.py +1 -1
- eval_studio_client/api/api/evaluator_service_api.py +1 -1
- eval_studio_client/api/api/human_calibration_service_api.py +304 -0
- eval_studio_client/api/api/info_service_api.py +1 -1
- eval_studio_client/api/api/leaderboard_report_service_api.py +292 -0
- eval_studio_client/api/api/leaderboard_service_api.py +17 -17
- eval_studio_client/api/api/model_service_api.py +17 -17
- eval_studio_client/api/api/operation_progress_service_api.py +1 -1
- eval_studio_client/api/api/operation_service_api.py +272 -17
- eval_studio_client/api/api/perturbation_service_api.py +1 -1
- eval_studio_client/api/api/perturbator_service_api.py +285 -18
- eval_studio_client/api/api/prompt_generation_service_api.py +1 -1
- eval_studio_client/api/api/prompt_library_service_api.py +669 -0
- eval_studio_client/api/api/test_case_relationship_service_api.py +292 -0
- eval_studio_client/api/api/test_case_service_api.py +17 -17
- eval_studio_client/api/api/test_class_service_api.py +17 -17
- eval_studio_client/api/api/test_lab_service_api.py +1 -1
- eval_studio_client/api/api/test_service_api.py +1272 -102
- eval_studio_client/api/api/who_am_i_service_api.py +1 -1
- eval_studio_client/api/api/workflow_edge_service_api.py +835 -0
- eval_studio_client/api/api/workflow_node_service_api.py +2431 -0
- eval_studio_client/api/api/workflow_service_api.py +2403 -0
- eval_studio_client/api/api_client.py +1 -1
- eval_studio_client/api/configuration.py +1 -1
- eval_studio_client/api/docs/AdversarialInputsServiceApi.md +78 -0
- eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +45 -0
- eval_studio_client/api/docs/DashboardServiceApi.md +4 -2
- eval_studio_client/api/docs/HumanCalibrationServiceApi.md +77 -0
- eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -0
- eval_studio_client/api/docs/LeaderboardServiceApi.md +5 -5
- eval_studio_client/api/docs/ModelServiceApi.md +5 -5
- eval_studio_client/api/docs/OperationServiceApi.md +72 -5
- eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +1 -0
- eval_studio_client/api/docs/PerturbatorServiceApi.md +38 -8
- eval_studio_client/api/docs/PromptGenerationServiceAutoGeneratePromptsRequest.md +4 -2
- eval_studio_client/api/docs/PromptLibraryServiceApi.md +155 -0
- eval_studio_client/api/docs/ProtobufNullValue.md +12 -0
- eval_studio_client/api/docs/RequiredTheDashboardToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +3 -0
- eval_studio_client/api/docs/RequiredTheTestToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheUpdatedWorkflow.md +47 -0
- eval_studio_client/api/docs/RequiredTheUpdatedWorkflowNode.md +44 -0
- eval_studio_client/api/docs/TestCaseRelationshipServiceApi.md +75 -0
- eval_studio_client/api/docs/TestCaseServiceApi.md +5 -5
- eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
- eval_studio_client/api/docs/TestServiceApi.md +293 -9
- eval_studio_client/api/docs/TestServiceCloneTestRequest.md +30 -0
- eval_studio_client/api/docs/TestServiceGenerateTestCasesRequest.md +3 -1
- eval_studio_client/api/docs/TestServiceImportTestCasesFromLibraryRequest.md +32 -0
- eval_studio_client/api/docs/TestServiceListTestCaseLibraryItemsRequest.md +35 -0
- eval_studio_client/api/docs/TestServicePerturbTestInPlaceRequest.md +30 -0
- eval_studio_client/api/docs/TestServicePerturbTestRequest.md +1 -0
- eval_studio_client/api/docs/V1AbortOperationResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchDeleteWorkflowsRequest.md +29 -0
- eval_studio_client/api/docs/V1BatchDeleteWorkflowsResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchGetWorkflowEdgesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchGetWorkflowNodesResponse.md +29 -0
- eval_studio_client/api/docs/V1CloneTestResponse.md +29 -0
- eval_studio_client/api/docs/V1CloneWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Context.md +37 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateWorkflowEdgeResponse.md +29 -0
- eval_studio_client/api/docs/V1CreateWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1CreateWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Dashboard.md +1 -0
- eval_studio_client/api/docs/V1DashboardType.md +12 -0
- eval_studio_client/api/docs/V1DeleteWorkflowEdgeResponse.md +29 -0
- eval_studio_client/api/docs/V1DeleteWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1DeleteWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1DependencyList.md +30 -0
- eval_studio_client/api/docs/V1EstimateThresholdRequest.md +33 -0
- eval_studio_client/api/docs/V1Evaluator.md +2 -0
- eval_studio_client/api/docs/V1GetGuardrailsConfigurationResponse.md +29 -0
- eval_studio_client/api/docs/V1GetLeaderboardReportResponse.md +29 -0
- eval_studio_client/api/docs/V1GetWorkflowNodePrerequisitesResponse.md +30 -0
- eval_studio_client/api/docs/V1GetWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1GetWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1ImportEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1ImportTestCasesFromLibraryResponse.md +29 -0
- eval_studio_client/api/docs/V1ImportTestCasesRequest.md +33 -0
- eval_studio_client/api/docs/V1Info.md +3 -0
- eval_studio_client/api/docs/V1InitWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1LabeledTestCase.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardReport.md +32 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputData.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardReportEvaluator.md +42 -0
- eval_studio_client/api/docs/V1LeaderboardReportEvaluatorParameter.md +38 -0
- eval_studio_client/api/docs/V1LeaderboardReportExplanation.md +34 -0
- eval_studio_client/api/docs/V1LeaderboardReportMetricsMetaEntry.md +41 -0
- eval_studio_client/api/docs/V1LeaderboardReportModel.md +37 -0
- eval_studio_client/api/docs/V1LeaderboardReportResult.md +45 -0
- eval_studio_client/api/docs/V1LeaderboardReportResultRelationship.md +32 -0
- eval_studio_client/api/docs/V1ListPromptLibraryItemsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListTestCaseLibraryItemsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListTestCaseRelationshipsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListWorkflowDependenciesResponse.md +30 -0
- eval_studio_client/api/docs/V1ListWorkflowsResponse.md +29 -0
- eval_studio_client/api/docs/V1MetricScore.md +31 -0
- eval_studio_client/api/docs/V1MetricScores.md +29 -0
- eval_studio_client/api/docs/V1PerturbTestInPlaceResponse.md +29 -0
- eval_studio_client/api/docs/V1ProcessWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1PromptLibraryItem.md +42 -0
- eval_studio_client/api/docs/V1RepeatedContext.md +29 -0
- eval_studio_client/api/docs/V1RepeatedString.md +29 -0
- eval_studio_client/api/docs/V1ResetWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1Test.md +1 -0
- eval_studio_client/api/docs/V1TestCase.md +3 -0
- eval_studio_client/api/docs/V1TestSuiteEvaluates.md +11 -0
- eval_studio_client/api/docs/V1TestType.md +12 -0
- eval_studio_client/api/docs/V1UpdateWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Workflow.md +49 -0
- eval_studio_client/api/docs/V1WorkflowDependency.md +30 -0
- eval_studio_client/api/docs/V1WorkflowEdge.md +40 -0
- eval_studio_client/api/docs/V1WorkflowEdgeType.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNode.md +46 -0
- eval_studio_client/api/docs/V1WorkflowNodeArtifact.md +41 -0
- eval_studio_client/api/docs/V1WorkflowNodeArtifacts.md +29 -0
- eval_studio_client/api/docs/V1WorkflowNodeAttributes.md +30 -0
- eval_studio_client/api/docs/V1WorkflowNodeStatus.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNodeType.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNodeView.md +12 -0
- eval_studio_client/api/docs/V1WorkflowType.md +12 -0
- eval_studio_client/api/docs/WorkflowEdgeServiceApi.md +215 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +632 -0
- eval_studio_client/api/docs/WorkflowServiceApi.md +623 -0
- eval_studio_client/api/docs/WorkflowServiceCloneWorkflowRequest.md +33 -0
- eval_studio_client/api/exceptions.py +1 -1
- eval_studio_client/api/models/__init__.py +75 -1
- eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +143 -0
- eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +9 -3
- eval_studio_client/api/models/prompt_generation_service_auto_generate_prompts_request.py +18 -6
- eval_studio_client/api/models/protobuf_any.py +1 -1
- eval_studio_client/api/models/protobuf_null_value.py +36 -0
- eval_studio_client/api/models/required_the_dashboard_to_update.py +6 -3
- eval_studio_client/api/models/required_the_document_to_update.py +1 -1
- eval_studio_client/api/models/required_the_leaderboard_to_update.py +1 -1
- eval_studio_client/api/models/required_the_model_to_update.py +1 -1
- eval_studio_client/api/models/required_the_operation_to_finalize.py +1 -1
- eval_studio_client/api/models/required_the_operation_to_update.py +1 -1
- eval_studio_client/api/models/required_the_test_case_to_update.py +14 -3
- eval_studio_client/api/models/required_the_test_to_update.py +6 -3
- eval_studio_client/api/models/required_the_updated_workflow.py +160 -0
- eval_studio_client/api/models/required_the_updated_workflow_node.py +152 -0
- eval_studio_client/api/models/rpc_status.py +1 -1
- eval_studio_client/api/models/test_case_service_batch_delete_test_cases_request.py +1 -1
- eval_studio_client/api/models/test_service_clone_test_request.py +89 -0
- eval_studio_client/api/models/test_service_generate_test_cases_request.py +16 -4
- eval_studio_client/api/models/test_service_import_test_cases_from_library_request.py +93 -0
- eval_studio_client/api/models/test_service_list_test_case_library_items_request.py +99 -0
- eval_studio_client/api/models/test_service_perturb_test_in_place_request.py +97 -0
- eval_studio_client/api/models/test_service_perturb_test_request.py +5 -3
- eval_studio_client/api/models/v1_abort_operation_response.py +91 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_request.py +1 -1
- eval_studio_client/api/models/v1_batch_create_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_dashboards_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_documents_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_documents_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_evaluators_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_evaluators_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_leaderboards_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_models_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_models_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_test_cases_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_tests_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_tests_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_workflows_request.py +87 -0
- eval_studio_client/api/models/v1_batch_delete_workflows_response.py +95 -0
- eval_studio_client/api/models/v1_batch_get_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_documents_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_models_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_operations_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_tests_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_workflow_edges_response.py +95 -0
- eval_studio_client/api/models/v1_batch_get_workflow_nodes_response.py +95 -0
- eval_studio_client/api/models/v1_batch_import_leaderboard_request.py +1 -1
- eval_studio_client/api/models/v1_batch_import_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_batch_import_tests_request.py +1 -1
- eval_studio_client/api/models/v1_batch_import_tests_response.py +1 -1
- eval_studio_client/api/models/v1_check_base_models_response.py +1 -1
- eval_studio_client/api/models/v1_clone_test_response.py +91 -0
- eval_studio_client/api/models/v1_clone_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_collection_info.py +1 -1
- eval_studio_client/api/models/v1_context.py +103 -0
- eval_studio_client/api/models/v1_create_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_create_document_response.py +1 -1
- eval_studio_client/api/models/v1_create_evaluation_request.py +8 -3
- eval_studio_client/api/models/v1_create_evaluator_response.py +1 -1
- eval_studio_client/api/models/v1_create_leaderboard_request.py +1 -1
- eval_studio_client/api/models/v1_create_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_create_leaderboard_without_cache_response.py +1 -1
- eval_studio_client/api/models/v1_create_model_response.py +1 -1
- eval_studio_client/api/models/v1_create_perturbation_response.py +1 -1
- eval_studio_client/api/models/v1_create_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_create_test_lab_response.py +1 -1
- eval_studio_client/api/models/v1_create_test_response.py +1 -1
- eval_studio_client/api/models/v1_create_workflow_edge_response.py +91 -0
- eval_studio_client/api/models/v1_create_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_create_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_dashboard.py +6 -3
- eval_studio_client/api/models/v1_dashboard_status.py +1 -1
- eval_studio_client/api/models/v1_dashboard_type.py +38 -0
- eval_studio_client/api/models/v1_delete_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_delete_document_response.py +1 -1
- eval_studio_client/api/models/v1_delete_evaluator_response.py +1 -1
- eval_studio_client/api/models/v1_delete_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_delete_model_response.py +1 -1
- eval_studio_client/api/models/v1_delete_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_delete_test_response.py +1 -1
- eval_studio_client/api/models/v1_delete_workflow_edge_response.py +91 -0
- eval_studio_client/api/models/v1_delete_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_delete_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_dependency_list.py +97 -0
- eval_studio_client/api/models/v1_document.py +1 -1
- eval_studio_client/api/models/v1_estimate_threshold_request.py +103 -0
- eval_studio_client/api/models/v1_evaluation_test.py +1 -1
- eval_studio_client/api/models/v1_evaluator.py +12 -4
- eval_studio_client/api/models/v1_evaluator_param_type.py +1 -1
- eval_studio_client/api/models/v1_evaluator_parameter.py +1 -1
- eval_studio_client/api/models/v1_evaluator_view.py +1 -1
- eval_studio_client/api/models/v1_finalize_operation_response.py +1 -1
- eval_studio_client/api/models/v1_find_all_test_cases_by_id_response.py +1 -1
- eval_studio_client/api/models/v1_find_test_lab_response.py +1 -1
- eval_studio_client/api/models/v1_generate_test_cases_response.py +1 -1
- eval_studio_client/api/models/v1_get_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_get_document_response.py +1 -1
- eval_studio_client/api/models/v1_get_evaluator_response.py +1 -1
- eval_studio_client/api/models/v1_get_guardrails_configuration_response.py +87 -0
- eval_studio_client/api/models/v1_get_info_response.py +1 -1
- eval_studio_client/api/models/v1_get_leaderboard_report_response.py +91 -0
- eval_studio_client/api/models/v1_get_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_get_model_response.py +1 -1
- eval_studio_client/api/models/v1_get_operation_progress_by_parent_response.py +1 -1
- eval_studio_client/api/models/v1_get_operation_response.py +1 -1
- eval_studio_client/api/models/v1_get_perturbator_response.py +1 -1
- eval_studio_client/api/models/v1_get_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_get_test_class_response.py +1 -1
- eval_studio_client/api/models/v1_get_test_response.py +1 -1
- eval_studio_client/api/models/v1_get_workflow_node_prerequisites_response.py +89 -0
- eval_studio_client/api/models/v1_get_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_get_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_import_evaluation_request.py +8 -3
- eval_studio_client/api/models/v1_import_leaderboard_request.py +1 -1
- eval_studio_client/api/models/v1_import_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_import_test_cases_from_library_response.py +91 -0
- eval_studio_client/api/models/v1_import_test_cases_request.py +95 -0
- eval_studio_client/api/models/v1_info.py +10 -4
- eval_studio_client/api/models/v1_init_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_insight.py +1 -1
- eval_studio_client/api/models/v1_labeled_test_case.py +91 -0
- eval_studio_client/api/models/v1_leaderboard.py +1 -1
- eval_studio_client/api/models/v1_leaderboard_report.py +115 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_data.py +93 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +101 -0
- eval_studio_client/api/models/v1_leaderboard_report_evaluator.py +155 -0
- eval_studio_client/api/models/v1_leaderboard_report_evaluator_parameter.py +109 -0
- eval_studio_client/api/models/v1_leaderboard_report_explanation.py +103 -0
- eval_studio_client/api/models/v1_leaderboard_report_metrics_meta_entry.py +129 -0
- eval_studio_client/api/models/v1_leaderboard_report_model.py +113 -0
- eval_studio_client/api/models/v1_leaderboard_report_result.py +175 -0
- eval_studio_client/api/models/v1_leaderboard_report_result_relationship.py +97 -0
- eval_studio_client/api/models/v1_leaderboard_status.py +1 -1
- eval_studio_client/api/models/v1_leaderboard_type.py +1 -1
- eval_studio_client/api/models/v1_leaderboard_view.py +1 -1
- eval_studio_client/api/models/v1_list_base_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_documents_response.py +1 -1
- eval_studio_client/api/models/v1_list_evaluators_response.py +1 -1
- eval_studio_client/api/models/v1_list_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_llm_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_model_collections_response.py +1 -1
- eval_studio_client/api/models/v1_list_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_tests_response.py +1 -1
- eval_studio_client/api/models/v1_list_operations_response.py +1 -1
- eval_studio_client/api/models/v1_list_perturbators_response.py +1 -1
- eval_studio_client/api/models/v1_list_prompt_library_items_response.py +95 -0
- eval_studio_client/api/models/v1_list_rag_collections_response.py +1 -1
- eval_studio_client/api/models/v1_list_test_case_library_items_response.py +95 -0
- eval_studio_client/api/models/v1_list_test_case_relationships_response.py +95 -0
- eval_studio_client/api/models/v1_list_test_cases_response.py +1 -1
- eval_studio_client/api/models/v1_list_test_classes_response.py +1 -1
- eval_studio_client/api/models/v1_list_tests_response.py +1 -1
- eval_studio_client/api/models/v1_list_workflow_dependencies_response.py +105 -0
- eval_studio_client/api/models/v1_list_workflows_response.py +95 -0
- eval_studio_client/api/models/v1_metric_score.py +89 -0
- eval_studio_client/api/models/v1_metric_scores.py +95 -0
- eval_studio_client/api/models/v1_model.py +1 -1
- eval_studio_client/api/models/v1_model_type.py +1 -1
- eval_studio_client/api/models/v1_operation.py +1 -1
- eval_studio_client/api/models/v1_operation_progress.py +1 -1
- eval_studio_client/api/models/v1_perturb_test_in_place_response.py +91 -0
- eval_studio_client/api/models/v1_perturb_test_response.py +1 -1
- eval_studio_client/api/models/v1_perturbator.py +1 -1
- eval_studio_client/api/models/v1_perturbator_configuration.py +1 -1
- eval_studio_client/api/models/v1_perturbator_intensity.py +1 -1
- eval_studio_client/api/models/v1_problem_and_action.py +1 -1
- eval_studio_client/api/models/v1_process_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_prompt_library_item.py +129 -0
- eval_studio_client/api/models/v1_repeated_context.py +95 -0
- eval_studio_client/api/models/v1_repeated_string.py +87 -0
- eval_studio_client/api/models/v1_reset_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_test.py +6 -3
- eval_studio_client/api/models/v1_test_case.py +14 -3
- eval_studio_client/api/models/v1_test_case_relationship.py +1 -1
- eval_studio_client/api/models/v1_test_cases_generator.py +1 -1
- eval_studio_client/api/models/v1_test_class.py +1 -1
- eval_studio_client/api/models/v1_test_class_type.py +1 -1
- eval_studio_client/api/models/v1_test_lab.py +1 -1
- eval_studio_client/api/models/v1_test_suite_evaluates.py +39 -0
- eval_studio_client/api/models/v1_test_type.py +38 -0
- eval_studio_client/api/models/v1_update_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_update_document_response.py +1 -1
- eval_studio_client/api/models/v1_update_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_update_model_response.py +1 -1
- eval_studio_client/api/models/v1_update_operation_response.py +1 -1
- eval_studio_client/api/models/v1_update_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_update_test_response.py +1 -1
- eval_studio_client/api/models/v1_update_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_update_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_who_am_i_response.py +1 -1
- eval_studio_client/api/models/v1_workflow.py +164 -0
- eval_studio_client/api/models/v1_workflow_dependency.py +89 -0
- eval_studio_client/api/models/v1_workflow_edge.py +123 -0
- eval_studio_client/api/models/v1_workflow_edge_type.py +38 -0
- eval_studio_client/api/models/v1_workflow_node.py +156 -0
- eval_studio_client/api/models/v1_workflow_node_artifact.py +126 -0
- eval_studio_client/api/models/v1_workflow_node_artifacts.py +97 -0
- eval_studio_client/api/models/v1_workflow_node_attributes.py +87 -0
- eval_studio_client/api/models/v1_workflow_node_status.py +40 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +44 -0
- eval_studio_client/api/models/v1_workflow_node_view.py +38 -0
- eval_studio_client/api/models/v1_workflow_type.py +37 -0
- eval_studio_client/api/models/workflow_service_clone_workflow_request.py +95 -0
- eval_studio_client/api/rest.py +1 -1
- eval_studio_client/api/test/test_adversarial_inputs_service_api.py +37 -0
- eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +128 -0
- eval_studio_client/api/test/test_dashboard_service_api.py +1 -1
- eval_studio_client/api/test/test_document_service_api.py +1 -1
- eval_studio_client/api/test/test_evaluation_service_api.py +1 -1
- eval_studio_client/api/test/test_evaluator_service_api.py +1 -1
- eval_studio_client/api/test/test_human_calibration_service_api.py +38 -0
- eval_studio_client/api/test/test_info_service_api.py +1 -1
- eval_studio_client/api/test/test_leaderboard_report_service_api.py +37 -0
- eval_studio_client/api/test/test_leaderboard_service_api.py +1 -1
- eval_studio_client/api/test/test_model_service_api.py +1 -1
- eval_studio_client/api/test/test_operation_progress_service_api.py +1 -1
- eval_studio_client/api/test/test_operation_service_api.py +7 -1
- eval_studio_client/api/test/test_perturbation_service_api.py +1 -1
- eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +25 -3
- eval_studio_client/api/test/test_perturbator_service_api.py +1 -1
- eval_studio_client/api/test/test_prompt_generation_service_api.py +1 -1
- eval_studio_client/api/test/test_prompt_generation_service_auto_generate_prompts_request.py +21 -5
- eval_studio_client/api/test/test_prompt_library_service_api.py +43 -0
- eval_studio_client/api/test/test_protobuf_any.py +1 -1
- eval_studio_client/api/test/test_protobuf_null_value.py +33 -0
- eval_studio_client/api/test/test_required_the_dashboard_to_update.py +3 -2
- eval_studio_client/api/test/test_required_the_document_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_model_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_operation_to_finalize.py +1 -1
- eval_studio_client/api/test/test_required_the_operation_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_test_case_to_update.py +9 -2
- eval_studio_client/api/test/test_required_the_test_to_update.py +3 -2
- eval_studio_client/api/test/test_required_the_updated_workflow.py +92 -0
- eval_studio_client/api/test/test_required_the_updated_workflow_node.py +81 -0
- eval_studio_client/api/test/test_rpc_status.py +1 -1
- eval_studio_client/api/test/test_test_case_relationship_service_api.py +37 -0
- eval_studio_client/api/test/test_test_case_service_api.py +1 -1
- eval_studio_client/api/test/test_test_case_service_batch_delete_test_cases_request.py +1 -1
- eval_studio_client/api/test/test_test_class_service_api.py +1 -1
- eval_studio_client/api/test/test_test_lab_service_api.py +1 -1
- eval_studio_client/api/test/test_test_service_api.py +25 -1
- eval_studio_client/api/test/test_test_service_clone_test_request.py +52 -0
- eval_studio_client/api/test/test_test_service_generate_test_cases_request.py +17 -2
- eval_studio_client/api/test/test_test_service_import_test_cases_from_library_request.py +56 -0
- eval_studio_client/api/test/test_test_service_list_test_case_library_items_request.py +63 -0
- eval_studio_client/api/test/test_test_service_perturb_test_in_place_request.py +59 -0
- eval_studio_client/api/test/test_test_service_perturb_test_request.py +5 -2
- eval_studio_client/api/test/test_v1_abort_operation_response.py +71 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_dashboards_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_delete_documents_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_documents_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_evaluators_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_evaluators_response.py +4 -2
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_models_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_test_cases_response.py +9 -2
- eval_studio_client/api/test/test_v1_batch_delete_tests_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_delete_workflows_request.py +53 -0
- eval_studio_client/api/test/test_v1_batch_delete_workflows_response.py +95 -0
- eval_studio_client/api/test/test_v1_batch_get_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_get_documents_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_operations_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_get_workflow_edges_response.py +64 -0
- eval_studio_client/api/test/test_v1_batch_get_workflow_nodes_response.py +84 -0
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_import_tests_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_import_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_check_base_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_clone_test_response.py +68 -0
- eval_studio_client/api/test/test_v1_clone_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_collection_info.py +1 -1
- eval_studio_client/api/test/test_v1_context.py +59 -0
- eval_studio_client/api/test/test_v1_create_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_create_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +25 -3
- eval_studio_client/api/test/test_v1_create_evaluator_response.py +4 -2
- eval_studio_client/api/test/test_v1_create_leaderboard_request.py +1 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_perturbation_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_create_test_lab_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_create_workflow_edge_response.py +62 -0
- eval_studio_client/api/test/test_v1_create_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_create_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_dashboard.py +3 -2
- eval_studio_client/api/test/test_v1_dashboard_status.py +1 -1
- eval_studio_client/api/test/test_v1_dashboard_type.py +33 -0
- eval_studio_client/api/test/test_v1_delete_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_delete_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_delete_evaluator_response.py +4 -2
- eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_delete_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_delete_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_delete_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_delete_workflow_edge_response.py +62 -0
- eval_studio_client/api/test/test_v1_delete_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_delete_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_dependency_list.py +56 -0
- eval_studio_client/api/test/test_v1_document.py +1 -1
- eval_studio_client/api/test/test_v1_estimate_threshold_request.py +60 -0
- eval_studio_client/api/test/test_v1_evaluation_test.py +9 -2
- eval_studio_client/api/test/test_v1_evaluator.py +4 -2
- eval_studio_client/api/test/test_v1_evaluator_param_type.py +1 -1
- eval_studio_client/api/test/test_v1_evaluator_parameter.py +1 -1
- eval_studio_client/api/test/test_v1_evaluator_view.py +1 -1
- eval_studio_client/api/test/test_v1_finalize_operation_response.py +1 -1
- eval_studio_client/api/test/test_v1_find_all_test_cases_by_id_response.py +9 -2
- eval_studio_client/api/test/test_v1_find_test_lab_response.py +1 -1
- eval_studio_client/api/test/test_v1_generate_test_cases_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_get_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_evaluator_response.py +4 -2
- eval_studio_client/api/test/test_v1_get_guardrails_configuration_response.py +51 -0
- eval_studio_client/api/test/test_v1_get_info_response.py +7 -2
- eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +173 -0
- eval_studio_client/api/test/test_v1_get_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_operation_progress_by_parent_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_operation_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_perturbator_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_get_test_class_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_get_workflow_node_prerequisites_response.py +56 -0
- eval_studio_client/api/test/test_v1_get_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_get_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_import_evaluation_request.py +17 -2
- eval_studio_client/api/test/test_v1_import_leaderboard_request.py +1 -1
- eval_studio_client/api/test/test_v1_import_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +71 -0
- eval_studio_client/api/test/test_v1_import_test_cases_request.py +57 -0
- eval_studio_client/api/test/test_v1_info.py +7 -2
- eval_studio_client/api/test/test_v1_init_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_insight.py +1 -1
- eval_studio_client/api/test/test_v1_labeled_test_case.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard.py +1 -1
- eval_studio_client/api/test/test_v1_leaderboard_report.py +172 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_data.py +52 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +56 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_evaluator.py +114 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_evaluator_parameter.py +63 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_explanation.py +58 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_metrics_meta_entry.py +66 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_model.py +60 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_result.py +92 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_result_relationship.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard_status.py +1 -1
- eval_studio_client/api/test/test_v1_leaderboard_type.py +1 -1
- eval_studio_client/api/test/test_v1_leaderboard_view.py +1 -1
- eval_studio_client/api/test/test_v1_list_base_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_documents_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_evaluators_response.py +4 -2
- eval_studio_client/api/test/test_v1_list_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_llm_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_model_collections_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_most_recent_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_most_recent_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_most_recent_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_operations_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_perturbators_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_prompt_library_items_response.py +71 -0
- eval_studio_client/api/test/test_v1_list_rag_collections_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_test_case_library_items_response.py +71 -0
- eval_studio_client/api/test/test_v1_list_test_case_relationships_response.py +56 -0
- eval_studio_client/api/test/test_v1_list_test_cases_response.py +9 -2
- eval_studio_client/api/test/test_v1_list_test_classes_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_workflow_dependencies_response.py +93 -0
- eval_studio_client/api/test/test_v1_list_workflows_response.py +95 -0
- eval_studio_client/api/test/test_v1_metric_score.py +52 -0
- eval_studio_client/api/test/test_v1_metric_scores.py +55 -0
- eval_studio_client/api/test/test_v1_model.py +1 -1
- eval_studio_client/api/test/test_v1_model_type.py +1 -1
- eval_studio_client/api/test/test_v1_operation.py +1 -1
- eval_studio_client/api/test/test_v1_operation_progress.py +1 -1
- eval_studio_client/api/test/test_v1_perturb_test_in_place_response.py +68 -0
- eval_studio_client/api/test/test_v1_perturb_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_perturbator.py +1 -1
- eval_studio_client/api/test/test_v1_perturbator_configuration.py +1 -1
- eval_studio_client/api/test/test_v1_perturbator_intensity.py +1 -1
- eval_studio_client/api/test/test_v1_problem_and_action.py +1 -1
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +71 -0
- eval_studio_client/api/test/test_v1_prompt_library_item.py +68 -0
- eval_studio_client/api/test/test_v1_repeated_context.py +62 -0
- eval_studio_client/api/test/test_v1_repeated_string.py +53 -0
- eval_studio_client/api/test/test_v1_reset_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_test.py +3 -2
- eval_studio_client/api/test/test_v1_test_case.py +9 -2
- eval_studio_client/api/test/test_v1_test_case_relationship.py +1 -1
- eval_studio_client/api/test/test_v1_test_cases_generator.py +1 -1
- eval_studio_client/api/test/test_v1_test_class.py +1 -1
- eval_studio_client/api/test/test_v1_test_class_type.py +1 -1
- eval_studio_client/api/test/test_v1_test_lab.py +1 -1
- eval_studio_client/api/test/test_v1_test_suite_evaluates.py +33 -0
- eval_studio_client/api/test/test_v1_test_type.py +33 -0
- eval_studio_client/api/test/test_v1_update_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_update_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_operation_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_update_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_update_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_update_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_who_am_i_response.py +1 -1
- eval_studio_client/api/test/test_v1_workflow.py +93 -0
- eval_studio_client/api/test/test_v1_workflow_dependency.py +52 -0
- eval_studio_client/api/test/test_v1_workflow_edge.py +61 -0
- eval_studio_client/api/test/test_v1_workflow_edge_type.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node.py +82 -0
- eval_studio_client/api/test/test_v1_workflow_node_artifact.py +62 -0
- eval_studio_client/api/test/test_v1_workflow_node_artifacts.py +65 -0
- eval_studio_client/api/test/test_v1_workflow_node_attributes.py +51 -0
- eval_studio_client/api/test/test_v1_workflow_node_status.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node_type.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node_view.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_type.py +33 -0
- eval_studio_client/api/test/test_who_am_i_service_api.py +1 -1
- eval_studio_client/api/test/test_workflow_edge_service_api.py +52 -0
- eval_studio_client/api/test/test_workflow_node_service_api.py +94 -0
- eval_studio_client/api/test/test_workflow_service_api.py +93 -0
- eval_studio_client/api/test/test_workflow_service_clone_workflow_request.py +55 -0
- eval_studio_client/client.py +7 -0
- eval_studio_client/dashboards.py +29 -0
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +5318 -1884
- eval_studio_client/leaderboards.py +123 -0
- eval_studio_client/models.py +3 -42
- eval_studio_client/test_labs.py +49 -21
- eval_studio_client/tests.py +290 -8
- {eval_studio_client-1.0.3a1.dist-info → eval_studio_client-1.1.0.dist-info}/METADATA +1 -2
- eval_studio_client-1.1.0.dist-info/RECORD +732 -0
- eval_studio_client-1.0.3a1.dist-info/RECORD +0 -486
- {eval_studio_client-1.0.3a1.dist-info → eval_studio_client-1.1.0.dist-info}/WHEEL +0 -0
eval_studio_client/tests.py
CHANGED
|
@@ -13,6 +13,9 @@ from eval_studio_client import documents as d7s
|
|
|
13
13
|
from eval_studio_client import perturbators as p10s
|
|
14
14
|
from eval_studio_client import utils
|
|
15
15
|
from eval_studio_client.api import models
|
|
16
|
+
from eval_studio_client.api.models import (
|
|
17
|
+
test_service_clone_test_request as clone_test_request,
|
|
18
|
+
)
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
class TestCaseGenerator(enum.Enum):
|
|
@@ -87,7 +90,6 @@ class TestCaseGenerator(enum.Enum):
|
|
|
87
90
|
|
|
88
91
|
@dataclasses.dataclass
|
|
89
92
|
class _TestCaseGenerationHandle:
|
|
90
|
-
|
|
91
93
|
name: Any | None
|
|
92
94
|
progress: Optional[float] = None
|
|
93
95
|
progress_message: Optional[str] = None
|
|
@@ -118,6 +120,70 @@ class _TestCaseGenerationHandle:
|
|
|
118
120
|
)
|
|
119
121
|
|
|
120
122
|
|
|
123
|
+
@dataclasses.dataclass
|
|
124
|
+
class _TestCaseLibraryGetHandle(_TestCaseGenerationHandle):
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _from_operation(
|
|
127
|
+
res: (
|
|
128
|
+
models.V1ImportTestCasesFromLibraryResponse | models.V1GetOperationResponse
|
|
129
|
+
),
|
|
130
|
+
) -> "_TestCaseLibraryGetHandle":
|
|
131
|
+
"""Converts an API operation to prompt library handle."""
|
|
132
|
+
op: models.V1Operation | None = res.operation
|
|
133
|
+
if not op:
|
|
134
|
+
return _TestCaseLibraryGetHandle(name=None)
|
|
135
|
+
|
|
136
|
+
# progress
|
|
137
|
+
if hasattr(op, "metadata") and op.metadata:
|
|
138
|
+
meta_dict = op.metadata.to_dict() or {}
|
|
139
|
+
else:
|
|
140
|
+
meta_dict = {}
|
|
141
|
+
|
|
142
|
+
return _TestCaseLibraryGetHandle(
|
|
143
|
+
name=op.name,
|
|
144
|
+
progress=meta_dict.get("progress"),
|
|
145
|
+
progress_message=meta_dict.get("progressMessage"),
|
|
146
|
+
error=op.error,
|
|
147
|
+
done=op.done,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclasses.dataclass
|
|
152
|
+
class TestCaseLibraryItem:
|
|
153
|
+
"""Represents a single test case library item - test suite."""
|
|
154
|
+
|
|
155
|
+
key: str
|
|
156
|
+
name: str
|
|
157
|
+
description: str
|
|
158
|
+
test_suite_url: str
|
|
159
|
+
test_count: int
|
|
160
|
+
test_case_count: int
|
|
161
|
+
evaluates: List[str]
|
|
162
|
+
categories: List[str]
|
|
163
|
+
|
|
164
|
+
@staticmethod
|
|
165
|
+
def _from_api_items(
|
|
166
|
+
api_items: List[models.V1PromptLibraryItem],
|
|
167
|
+
) -> List["TestCaseLibraryItem"]:
|
|
168
|
+
return (
|
|
169
|
+
[
|
|
170
|
+
TestCaseLibraryItem(
|
|
171
|
+
key=api_item.name or "",
|
|
172
|
+
name=api_item.display_name or "",
|
|
173
|
+
description=api_item.description or "",
|
|
174
|
+
test_suite_url=api_item.test_suite_url or "",
|
|
175
|
+
test_count=api_item.test_count or 0,
|
|
176
|
+
test_case_count=api_item.test_case_count or 0,
|
|
177
|
+
evaluates=list(api_item.evaluates) if api_item.evaluates else [],
|
|
178
|
+
categories=list(api_item.categories) if api_item.categories else [],
|
|
179
|
+
)
|
|
180
|
+
for api_item in api_items
|
|
181
|
+
]
|
|
182
|
+
if api_items
|
|
183
|
+
else []
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
121
187
|
@dataclasses.dataclass
|
|
122
188
|
class TestCase:
|
|
123
189
|
"""Represents a single test case, which contains tested prompt, expected answer
|
|
@@ -162,6 +228,31 @@ class TestCase:
|
|
|
162
228
|
)
|
|
163
229
|
|
|
164
230
|
|
|
231
|
+
@dataclasses.dataclass
|
|
232
|
+
class TestCaseRelationship:
|
|
233
|
+
source_test_case_key: str
|
|
234
|
+
target_test_case_key: str
|
|
235
|
+
relationship_type: str
|
|
236
|
+
|
|
237
|
+
def to_api_proto(self) -> models.V1TestCaseRelationship:
|
|
238
|
+
"""Converts the client TestCase to an API TestCase."""
|
|
239
|
+
return models.V1TestCaseRelationship(
|
|
240
|
+
source=self.source_test_case_key,
|
|
241
|
+
target=self.target_test_case_key,
|
|
242
|
+
type=self.relationship_type,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _from_api_test_case_rel(
|
|
247
|
+
api_test_case_rel: models.V1TestCaseRelationship,
|
|
248
|
+
) -> "TestCaseRelationship":
|
|
249
|
+
return TestCaseRelationship(
|
|
250
|
+
source_test_case_key=api_test_case_rel.source or "",
|
|
251
|
+
target_test_case_key=api_test_case_rel.target or "",
|
|
252
|
+
relationship_type=api_test_case_rel.type or "",
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
165
256
|
@dataclasses.dataclass
|
|
166
257
|
class Test:
|
|
167
258
|
"""Represents a test, which contains a set of test cases and optionally
|
|
@@ -183,6 +274,7 @@ class Test:
|
|
|
183
274
|
update_time: Optional[datetime.datetime] = None
|
|
184
275
|
_client: Optional[api.ApiClient] = None
|
|
185
276
|
_gen_tc_op_name: Optional[str] = None
|
|
277
|
+
_lib_tc_op_name: Optional[str] = None
|
|
186
278
|
|
|
187
279
|
def __post_init__(self):
|
|
188
280
|
if self._client:
|
|
@@ -190,6 +282,7 @@ class Test:
|
|
|
190
282
|
self._test_case_api = api.TestCaseServiceApi(self._client)
|
|
191
283
|
self._document_api = api.DocumentServiceApi(self._client)
|
|
192
284
|
self._operation_api = api.OperationServiceApi(self._client)
|
|
285
|
+
self._relationships_api = api.TestCaseRelationshipServiceApi(self._client)
|
|
193
286
|
|
|
194
287
|
@property
|
|
195
288
|
def test_cases(self) -> List[TestCase]:
|
|
@@ -200,6 +293,19 @@ class Test:
|
|
|
200
293
|
|
|
201
294
|
return []
|
|
202
295
|
|
|
296
|
+
@property
|
|
297
|
+
def test_case_relationships(self) -> List[TestCaseRelationship]:
|
|
298
|
+
"""Retrieves all relationships among test cases of the test."""
|
|
299
|
+
r_a = self._relationships_api
|
|
300
|
+
res = r_a.test_case_relationship_service_list_test_case_relationships(self.key)
|
|
301
|
+
if res and res.test_case_relationships:
|
|
302
|
+
return [
|
|
303
|
+
TestCaseRelationship._from_api_test_case_rel(r)
|
|
304
|
+
for r in res.test_case_relationships
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
return []
|
|
308
|
+
|
|
203
309
|
@property
|
|
204
310
|
def documents(self) -> List[d7s.Document]:
|
|
205
311
|
"""Retrieves all documents attached to the test."""
|
|
@@ -222,12 +328,12 @@ class Test:
|
|
|
222
328
|
perturbators: Union[p10s.Perturbator, str, List[Union[p10s.Perturbator, str]]],
|
|
223
329
|
new_test_description: str = "",
|
|
224
330
|
) -> "Test":
|
|
225
|
-
"""Creates new Test by perturbing this test using the given
|
|
331
|
+
"""Creates new Test by perturbing this test using the given perturbators.
|
|
226
332
|
|
|
227
333
|
Args:
|
|
228
334
|
new_test_name (str): Name of the newly created test.
|
|
229
|
-
perturbators (Perturbator, List[Perturbator], str or List[str]): List of
|
|
230
|
-
their keys used to perturbate this Test.
|
|
335
|
+
perturbators (Perturbator, List[Perturbator], str or List[str]): List of
|
|
336
|
+
perturbators or their keys used to perturbate this Test.
|
|
231
337
|
new_test_description (str): Optional description of the newly created test.
|
|
232
338
|
"""
|
|
233
339
|
|
|
@@ -248,13 +354,46 @@ class Test:
|
|
|
248
354
|
configs = [_PerturbatorConfiguration(p) for p in perturbators_to_run]
|
|
249
355
|
|
|
250
356
|
req = models.TestServicePerturbTestRequest(
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
357
|
+
perturbator_configurations=[c.to_api_proto() for c in configs],
|
|
358
|
+
new_test_display_name=new_test_name,
|
|
359
|
+
new_test_description=new_test_description,
|
|
254
360
|
)
|
|
255
361
|
resp = self._test_api.test_service_perturb_test(self.key, req)
|
|
256
362
|
return Test._from_api_test(resp.test, self._client)
|
|
257
363
|
|
|
364
|
+
def perturb_in_place(
|
|
365
|
+
self,
|
|
366
|
+
perturbators: Union[p10s.Perturbator, str, List[Union[p10s.Perturbator, str]]],
|
|
367
|
+
test_case_names: Optional[List[str]] = None,
|
|
368
|
+
) -> str:
|
|
369
|
+
"""In-place (in-test) perturbation of test cases using the given perturbators.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
perturbators (Perturbator, List[Perturbator], str or List[str]): List of
|
|
373
|
+
perturbators or their keys used to perturbate this Test.
|
|
374
|
+
test_case_names (List[str]): List of test case names to perturbate.
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
if self._client is None:
|
|
378
|
+
raise RuntimeError("Client is not set.")
|
|
379
|
+
|
|
380
|
+
if not perturbators:
|
|
381
|
+
raise ValueError("Perturbators must be provided.")
|
|
382
|
+
|
|
383
|
+
if isinstance(perturbators, (p10s.Perturbator, str)):
|
|
384
|
+
perturbators_to_run = [perturbators]
|
|
385
|
+
else:
|
|
386
|
+
perturbators_to_run = perturbators
|
|
387
|
+
|
|
388
|
+
configs = [_PerturbatorConfiguration(p) for p in perturbators_to_run]
|
|
389
|
+
|
|
390
|
+
req = models.TestServicePerturbTestInPlaceRequest(
|
|
391
|
+
perturbator_configurations=[c.to_api_proto() for c in configs],
|
|
392
|
+
test_case_names=test_case_names,
|
|
393
|
+
)
|
|
394
|
+
resp = self._test_api.test_service_perturb_test(self.key, req)
|
|
395
|
+
return resp.test.name
|
|
396
|
+
|
|
258
397
|
def generate_test_cases(
|
|
259
398
|
self,
|
|
260
399
|
count: int,
|
|
@@ -267,7 +406,7 @@ class Test:
|
|
|
267
406
|
|
|
268
407
|
Args:
|
|
269
408
|
count (int): Number of test cases to generate (generator may return fewer
|
|
270
|
-
|
|
409
|
+
prompts).
|
|
271
410
|
model (str): Model to use for generating the prompts.
|
|
272
411
|
base_llm_model (str): Base LLM model to use for generating the prompts.
|
|
273
412
|
generators (List[TestCaseGenerator]): Methods to use for generation.
|
|
@@ -342,6 +481,127 @@ class Test:
|
|
|
342
481
|
|
|
343
482
|
raise TimeoutError("Waiting timeout has been reached.")
|
|
344
483
|
|
|
484
|
+
def list_test_suite_library_items(
|
|
485
|
+
self,
|
|
486
|
+
filter_by_categories: Optional[List[str]] = None,
|
|
487
|
+
filter_by_purposes: Optional[List[str]] = None,
|
|
488
|
+
filter_by_evaluates: Optional[List[str]] = None,
|
|
489
|
+
filter_by_origin: Optional[str] = None,
|
|
490
|
+
filter_by_test_case_count: Optional[int] = None,
|
|
491
|
+
filter_by_test_count: Optional[int] = None,
|
|
492
|
+
filter_by_fts: Optional[str] = None,
|
|
493
|
+
) -> List[TestCaseLibraryItem]:
|
|
494
|
+
"""Retrieves a list of all available items - suites of tests - in the library.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
filter_by_categories (List[str]): List of categories to filter
|
|
498
|
+
the library items.
|
|
499
|
+
filter_by_purposes (List[str]): List of purposes to filter
|
|
500
|
+
the library items.
|
|
501
|
+
filter_by_evaluates (List[str]): List of evaluates to filter
|
|
502
|
+
the library items.
|
|
503
|
+
filter_by_origin (str): Origin to filter the library items.
|
|
504
|
+
filter_by_test_case_count (int): Test case count to filter
|
|
505
|
+
the library items.
|
|
506
|
+
filter_by_test_count (int): Test count to filter the library items.
|
|
507
|
+
filter_by_fts (str): FTS to filter the library items - phrase to search for.
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
List[TestCaseLibraryItem]: List of library items.
|
|
511
|
+
"""
|
|
512
|
+
req = models.TestServiceListTestCaseLibraryItemsRequest(
|
|
513
|
+
filter_by_categories=filter_by_categories,
|
|
514
|
+
filter_by_purposes=filter_by_purposes,
|
|
515
|
+
filter_by_evaluates=filter_by_evaluates,
|
|
516
|
+
filter_by_origin=filter_by_origin,
|
|
517
|
+
filter_by_test_case_count=filter_by_test_case_count,
|
|
518
|
+
filter_by_test_count=filter_by_test_count,
|
|
519
|
+
filter_by_fts=filter_by_fts,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
res = self._test_api.test_service_list_test_case_library_items(self.key, req)
|
|
523
|
+
if res and res.prompt_library_items:
|
|
524
|
+
return TestCaseLibraryItem._from_api_items(res.prompt_library_items)
|
|
525
|
+
|
|
526
|
+
return []
|
|
527
|
+
|
|
528
|
+
def add_library_test_cases(
|
|
529
|
+
self, test_suite_url: str, count: int, test_document_urls: Optional[List[str]]
|
|
530
|
+
) -> None:
|
|
531
|
+
"""Sample test cases from the test suite library and add them to the test.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
test_suite_url (str): The URL of the library test suite to get TestCases
|
|
535
|
+
from (sample).
|
|
536
|
+
count (int): The number of TestCases to get from the library.
|
|
537
|
+
test_document_urls (List[str]): The list of target Test corpus
|
|
538
|
+
document URLs to skip when returning library TestCases corpus.
|
|
539
|
+
"""
|
|
540
|
+
req = models.TestServiceImportTestCasesFromLibraryRequest(
|
|
541
|
+
test_suite_url=test_suite_url,
|
|
542
|
+
count=count,
|
|
543
|
+
test_document_urls=test_document_urls,
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
res = self._test_api.test_service_import_test_cases_from_library(self.key, req)
|
|
547
|
+
|
|
548
|
+
op: models.V1Operation | None = res.operation
|
|
549
|
+
self._lib_tc_op_name = op.name if op else None
|
|
550
|
+
|
|
551
|
+
def wait_for_library_test_case_get(
|
|
552
|
+
self, timeout: Optional[float] = None, verbose: bool = False
|
|
553
|
+
) -> None:
|
|
554
|
+
"""Waits for the library test cases(s) sampling to finish.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
timeout (float): The maximum time to wait in seconds.
|
|
558
|
+
verbose (bool): If True, prints the status of the handle while waiting.
|
|
559
|
+
"""
|
|
560
|
+
if not self._lib_tc_op_name:
|
|
561
|
+
raise ValueError(
|
|
562
|
+
"There is no ongoing getting of test case(s) from the library - "
|
|
563
|
+
"the operation name is not set."
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
if verbose:
|
|
567
|
+
print(
|
|
568
|
+
f"Waiting for getting library test case(s) operation to finish "
|
|
569
|
+
f"({self._lib_tc_op_name}):"
|
|
570
|
+
)
|
|
571
|
+
if self._client:
|
|
572
|
+
# exponential backoff
|
|
573
|
+
wait_time = 1.0
|
|
574
|
+
wait_coef = 1.6
|
|
575
|
+
wait_max = 8.0
|
|
576
|
+
wait_total = 0.0
|
|
577
|
+
timeout = timeout or float(2 * 24 * 60 * 60) # 2 days
|
|
578
|
+
progress_bar = utils.ProgressBar()
|
|
579
|
+
while wait_total < timeout:
|
|
580
|
+
handle = _TestCaseLibraryGetHandle._from_operation(
|
|
581
|
+
self._operation_api.operation_service_get_operation(
|
|
582
|
+
self._lib_tc_op_name
|
|
583
|
+
)
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
if verbose:
|
|
587
|
+
progress_bar.update(handle.progress or 0, handle.progress_message)
|
|
588
|
+
|
|
589
|
+
if handle.done:
|
|
590
|
+
if handle.error:
|
|
591
|
+
raise RuntimeError(
|
|
592
|
+
f"Getting of library test case(s) failed: {handle.error}"
|
|
593
|
+
)
|
|
594
|
+
return
|
|
595
|
+
|
|
596
|
+
wait_time *= wait_coef
|
|
597
|
+
time.sleep(min(wait_time, wait_max))
|
|
598
|
+
else:
|
|
599
|
+
raise ValueError(
|
|
600
|
+
"Unable to establish a connection to the Eval Studio host."
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
raise TimeoutError("Waiting timeout has been reached.")
|
|
604
|
+
|
|
345
605
|
def delete(self, force=False):
|
|
346
606
|
"""Deletes the test.
|
|
347
607
|
|
|
@@ -507,6 +767,28 @@ class _Tests:
|
|
|
507
767
|
|
|
508
768
|
return None
|
|
509
769
|
|
|
770
|
+
def clone(
|
|
771
|
+
self, key: str, name: Optional[str] = "", description: Optional[str] = ""
|
|
772
|
+
) -> Optional[Test]:
|
|
773
|
+
"""Clone an existing test in the Eval Studio.
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
key (str): Resource name of the test to be cloned.
|
|
777
|
+
name (str): Optional new name of the cloned test.
|
|
778
|
+
description (str): Optional new description of the cloned test.
|
|
779
|
+
"""
|
|
780
|
+
res = self._api.test_service_clone_test(
|
|
781
|
+
key,
|
|
782
|
+
body=clone_test_request.TestServiceCloneTestRequest(
|
|
783
|
+
new_test_display_name=name, new_test_description=description
|
|
784
|
+
),
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
if res and res.test:
|
|
788
|
+
return Test._from_api_test(res.test, self._client)
|
|
789
|
+
|
|
790
|
+
return None
|
|
791
|
+
|
|
510
792
|
def delete(self, key: str):
|
|
511
793
|
"""Deletes the test with given resource name.
|
|
512
794
|
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: eval-studio-client
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
|
|
5
5
|
Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
|
|
6
6
|
Author-email: "H2O.ai" <support@h2o.ai>
|
|
7
|
-
License-Expression: MIT
|
|
8
7
|
Classifier: Development Status :: 4 - Beta
|
|
9
8
|
Classifier: Programming Language :: Python
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.9
|