eval-studio-client 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +83 -1
- eval_studio_client/api/api/__init__.py +8 -0
- eval_studio_client/api/api/adversarial_inputs_service_api.py +321 -0
- eval_studio_client/api/api/dashboard_service_api.py +18 -1
- eval_studio_client/api/api/document_service_api.py +1 -1
- eval_studio_client/api/api/evaluation_service_api.py +1 -1
- eval_studio_client/api/api/evaluator_service_api.py +1 -1
- eval_studio_client/api/api/human_calibration_service_api.py +304 -0
- eval_studio_client/api/api/info_service_api.py +1 -1
- eval_studio_client/api/api/leaderboard_report_service_api.py +292 -0
- eval_studio_client/api/api/leaderboard_service_api.py +17 -17
- eval_studio_client/api/api/model_service_api.py +17 -17
- eval_studio_client/api/api/operation_progress_service_api.py +1 -1
- eval_studio_client/api/api/operation_service_api.py +272 -17
- eval_studio_client/api/api/perturbation_service_api.py +1 -1
- eval_studio_client/api/api/perturbator_service_api.py +285 -18
- eval_studio_client/api/api/prompt_generation_service_api.py +1 -1
- eval_studio_client/api/api/prompt_library_service_api.py +669 -0
- eval_studio_client/api/api/test_case_relationship_service_api.py +292 -0
- eval_studio_client/api/api/test_case_service_api.py +17 -17
- eval_studio_client/api/api/test_class_service_api.py +17 -17
- eval_studio_client/api/api/test_lab_service_api.py +1 -1
- eval_studio_client/api/api/test_service_api.py +1272 -102
- eval_studio_client/api/api/who_am_i_service_api.py +1 -1
- eval_studio_client/api/api/workflow_edge_service_api.py +835 -0
- eval_studio_client/api/api/workflow_node_service_api.py +2431 -0
- eval_studio_client/api/api/workflow_service_api.py +2403 -0
- eval_studio_client/api/api_client.py +1 -1
- eval_studio_client/api/configuration.py +1 -1
- eval_studio_client/api/docs/AdversarialInputsServiceApi.md +78 -0
- eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +45 -0
- eval_studio_client/api/docs/DashboardServiceApi.md +4 -2
- eval_studio_client/api/docs/HumanCalibrationServiceApi.md +77 -0
- eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -0
- eval_studio_client/api/docs/LeaderboardServiceApi.md +5 -5
- eval_studio_client/api/docs/ModelServiceApi.md +5 -5
- eval_studio_client/api/docs/OperationServiceApi.md +72 -5
- eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +1 -0
- eval_studio_client/api/docs/PerturbatorServiceApi.md +38 -8
- eval_studio_client/api/docs/PromptGenerationServiceAutoGeneratePromptsRequest.md +4 -2
- eval_studio_client/api/docs/PromptLibraryServiceApi.md +155 -0
- eval_studio_client/api/docs/ProtobufNullValue.md +12 -0
- eval_studio_client/api/docs/RequiredTheDashboardToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +3 -0
- eval_studio_client/api/docs/RequiredTheTestToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheUpdatedWorkflow.md +47 -0
- eval_studio_client/api/docs/RequiredTheUpdatedWorkflowNode.md +44 -0
- eval_studio_client/api/docs/TestCaseRelationshipServiceApi.md +75 -0
- eval_studio_client/api/docs/TestCaseServiceApi.md +5 -5
- eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
- eval_studio_client/api/docs/TestServiceApi.md +293 -9
- eval_studio_client/api/docs/TestServiceCloneTestRequest.md +30 -0
- eval_studio_client/api/docs/TestServiceGenerateTestCasesRequest.md +3 -1
- eval_studio_client/api/docs/TestServiceImportTestCasesFromLibraryRequest.md +32 -0
- eval_studio_client/api/docs/TestServiceListTestCaseLibraryItemsRequest.md +35 -0
- eval_studio_client/api/docs/TestServicePerturbTestInPlaceRequest.md +30 -0
- eval_studio_client/api/docs/TestServicePerturbTestRequest.md +1 -0
- eval_studio_client/api/docs/V1AbortOperationResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchDeleteWorkflowsRequest.md +29 -0
- eval_studio_client/api/docs/V1BatchDeleteWorkflowsResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchGetWorkflowEdgesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchGetWorkflowNodesResponse.md +29 -0
- eval_studio_client/api/docs/V1CloneTestResponse.md +29 -0
- eval_studio_client/api/docs/V1CloneWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Context.md +37 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateWorkflowEdgeResponse.md +29 -0
- eval_studio_client/api/docs/V1CreateWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1CreateWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Dashboard.md +1 -0
- eval_studio_client/api/docs/V1DashboardType.md +12 -0
- eval_studio_client/api/docs/V1DeleteWorkflowEdgeResponse.md +29 -0
- eval_studio_client/api/docs/V1DeleteWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1DeleteWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1DependencyList.md +30 -0
- eval_studio_client/api/docs/V1EstimateThresholdRequest.md +33 -0
- eval_studio_client/api/docs/V1Evaluator.md +2 -0
- eval_studio_client/api/docs/V1GetGuardrailsConfigurationResponse.md +29 -0
- eval_studio_client/api/docs/V1GetLeaderboardReportResponse.md +29 -0
- eval_studio_client/api/docs/V1GetWorkflowNodePrerequisitesResponse.md +30 -0
- eval_studio_client/api/docs/V1GetWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1GetWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1ImportEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1ImportTestCasesFromLibraryResponse.md +29 -0
- eval_studio_client/api/docs/V1ImportTestCasesRequest.md +33 -0
- eval_studio_client/api/docs/V1Info.md +3 -0
- eval_studio_client/api/docs/V1InitWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1LabeledTestCase.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardReport.md +32 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputData.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardReportEvaluator.md +42 -0
- eval_studio_client/api/docs/V1LeaderboardReportEvaluatorParameter.md +38 -0
- eval_studio_client/api/docs/V1LeaderboardReportExplanation.md +34 -0
- eval_studio_client/api/docs/V1LeaderboardReportMetricsMetaEntry.md +41 -0
- eval_studio_client/api/docs/V1LeaderboardReportModel.md +37 -0
- eval_studio_client/api/docs/V1LeaderboardReportResult.md +45 -0
- eval_studio_client/api/docs/V1LeaderboardReportResultRelationship.md +32 -0
- eval_studio_client/api/docs/V1ListPromptLibraryItemsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListTestCaseLibraryItemsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListTestCaseRelationshipsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListWorkflowDependenciesResponse.md +30 -0
- eval_studio_client/api/docs/V1ListWorkflowsResponse.md +29 -0
- eval_studio_client/api/docs/V1MetricScore.md +31 -0
- eval_studio_client/api/docs/V1MetricScores.md +29 -0
- eval_studio_client/api/docs/V1PerturbTestInPlaceResponse.md +29 -0
- eval_studio_client/api/docs/V1ProcessWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1PromptLibraryItem.md +42 -0
- eval_studio_client/api/docs/V1RepeatedContext.md +29 -0
- eval_studio_client/api/docs/V1RepeatedString.md +29 -0
- eval_studio_client/api/docs/V1ResetWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1Test.md +1 -0
- eval_studio_client/api/docs/V1TestCase.md +3 -0
- eval_studio_client/api/docs/V1TestSuiteEvaluates.md +11 -0
- eval_studio_client/api/docs/V1TestType.md +12 -0
- eval_studio_client/api/docs/V1UpdateWorkflowNodeResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateWorkflowResponse.md +29 -0
- eval_studio_client/api/docs/V1Workflow.md +49 -0
- eval_studio_client/api/docs/V1WorkflowDependency.md +30 -0
- eval_studio_client/api/docs/V1WorkflowEdge.md +40 -0
- eval_studio_client/api/docs/V1WorkflowEdgeType.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNode.md +46 -0
- eval_studio_client/api/docs/V1WorkflowNodeArtifact.md +41 -0
- eval_studio_client/api/docs/V1WorkflowNodeArtifacts.md +29 -0
- eval_studio_client/api/docs/V1WorkflowNodeAttributes.md +30 -0
- eval_studio_client/api/docs/V1WorkflowNodeStatus.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNodeType.md +12 -0
- eval_studio_client/api/docs/V1WorkflowNodeView.md +12 -0
- eval_studio_client/api/docs/V1WorkflowType.md +12 -0
- eval_studio_client/api/docs/WorkflowEdgeServiceApi.md +215 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +632 -0
- eval_studio_client/api/docs/WorkflowServiceApi.md +623 -0
- eval_studio_client/api/docs/WorkflowServiceCloneWorkflowRequest.md +33 -0
- eval_studio_client/api/exceptions.py +1 -1
- eval_studio_client/api/models/__init__.py +75 -1
- eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +143 -0
- eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +9 -3
- eval_studio_client/api/models/prompt_generation_service_auto_generate_prompts_request.py +18 -6
- eval_studio_client/api/models/protobuf_any.py +1 -1
- eval_studio_client/api/models/protobuf_null_value.py +36 -0
- eval_studio_client/api/models/required_the_dashboard_to_update.py +6 -3
- eval_studio_client/api/models/required_the_document_to_update.py +1 -1
- eval_studio_client/api/models/required_the_leaderboard_to_update.py +1 -1
- eval_studio_client/api/models/required_the_model_to_update.py +1 -1
- eval_studio_client/api/models/required_the_operation_to_finalize.py +1 -1
- eval_studio_client/api/models/required_the_operation_to_update.py +1 -1
- eval_studio_client/api/models/required_the_test_case_to_update.py +14 -3
- eval_studio_client/api/models/required_the_test_to_update.py +6 -3
- eval_studio_client/api/models/required_the_updated_workflow.py +160 -0
- eval_studio_client/api/models/required_the_updated_workflow_node.py +152 -0
- eval_studio_client/api/models/rpc_status.py +1 -1
- eval_studio_client/api/models/test_case_service_batch_delete_test_cases_request.py +1 -1
- eval_studio_client/api/models/test_service_clone_test_request.py +89 -0
- eval_studio_client/api/models/test_service_generate_test_cases_request.py +16 -4
- eval_studio_client/api/models/test_service_import_test_cases_from_library_request.py +93 -0
- eval_studio_client/api/models/test_service_list_test_case_library_items_request.py +99 -0
- eval_studio_client/api/models/test_service_perturb_test_in_place_request.py +97 -0
- eval_studio_client/api/models/test_service_perturb_test_request.py +5 -3
- eval_studio_client/api/models/v1_abort_operation_response.py +91 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_request.py +1 -1
- eval_studio_client/api/models/v1_batch_create_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_dashboards_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_documents_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_documents_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_evaluators_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_evaluators_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_leaderboards_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_models_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_models_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_test_cases_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_tests_request.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_tests_response.py +1 -1
- eval_studio_client/api/models/v1_batch_delete_workflows_request.py +87 -0
- eval_studio_client/api/models/v1_batch_delete_workflows_response.py +95 -0
- eval_studio_client/api/models/v1_batch_get_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_documents_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_models_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_operations_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_tests_response.py +1 -1
- eval_studio_client/api/models/v1_batch_get_workflow_edges_response.py +95 -0
- eval_studio_client/api/models/v1_batch_get_workflow_nodes_response.py +95 -0
- eval_studio_client/api/models/v1_batch_import_leaderboard_request.py +1 -1
- eval_studio_client/api/models/v1_batch_import_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_batch_import_tests_request.py +1 -1
- eval_studio_client/api/models/v1_batch_import_tests_response.py +1 -1
- eval_studio_client/api/models/v1_check_base_models_response.py +1 -1
- eval_studio_client/api/models/v1_clone_test_response.py +91 -0
- eval_studio_client/api/models/v1_clone_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_collection_info.py +1 -1
- eval_studio_client/api/models/v1_context.py +103 -0
- eval_studio_client/api/models/v1_create_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_create_document_response.py +1 -1
- eval_studio_client/api/models/v1_create_evaluation_request.py +8 -3
- eval_studio_client/api/models/v1_create_evaluator_response.py +1 -1
- eval_studio_client/api/models/v1_create_leaderboard_request.py +1 -1
- eval_studio_client/api/models/v1_create_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_create_leaderboard_without_cache_response.py +1 -1
- eval_studio_client/api/models/v1_create_model_response.py +1 -1
- eval_studio_client/api/models/v1_create_perturbation_response.py +1 -1
- eval_studio_client/api/models/v1_create_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_create_test_lab_response.py +1 -1
- eval_studio_client/api/models/v1_create_test_response.py +1 -1
- eval_studio_client/api/models/v1_create_workflow_edge_response.py +91 -0
- eval_studio_client/api/models/v1_create_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_create_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_dashboard.py +6 -3
- eval_studio_client/api/models/v1_dashboard_status.py +1 -1
- eval_studio_client/api/models/v1_dashboard_type.py +38 -0
- eval_studio_client/api/models/v1_delete_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_delete_document_response.py +1 -1
- eval_studio_client/api/models/v1_delete_evaluator_response.py +1 -1
- eval_studio_client/api/models/v1_delete_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_delete_model_response.py +1 -1
- eval_studio_client/api/models/v1_delete_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_delete_test_response.py +1 -1
- eval_studio_client/api/models/v1_delete_workflow_edge_response.py +91 -0
- eval_studio_client/api/models/v1_delete_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_delete_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_dependency_list.py +97 -0
- eval_studio_client/api/models/v1_document.py +1 -1
- eval_studio_client/api/models/v1_estimate_threshold_request.py +103 -0
- eval_studio_client/api/models/v1_evaluation_test.py +1 -1
- eval_studio_client/api/models/v1_evaluator.py +12 -4
- eval_studio_client/api/models/v1_evaluator_param_type.py +1 -1
- eval_studio_client/api/models/v1_evaluator_parameter.py +1 -1
- eval_studio_client/api/models/v1_evaluator_view.py +1 -1
- eval_studio_client/api/models/v1_finalize_operation_response.py +1 -1
- eval_studio_client/api/models/v1_find_all_test_cases_by_id_response.py +1 -1
- eval_studio_client/api/models/v1_find_test_lab_response.py +1 -1
- eval_studio_client/api/models/v1_generate_test_cases_response.py +1 -1
- eval_studio_client/api/models/v1_get_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_get_document_response.py +1 -1
- eval_studio_client/api/models/v1_get_evaluator_response.py +1 -1
- eval_studio_client/api/models/v1_get_guardrails_configuration_response.py +87 -0
- eval_studio_client/api/models/v1_get_info_response.py +1 -1
- eval_studio_client/api/models/v1_get_leaderboard_report_response.py +91 -0
- eval_studio_client/api/models/v1_get_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_get_model_response.py +1 -1
- eval_studio_client/api/models/v1_get_operation_progress_by_parent_response.py +1 -1
- eval_studio_client/api/models/v1_get_operation_response.py +1 -1
- eval_studio_client/api/models/v1_get_perturbator_response.py +1 -1
- eval_studio_client/api/models/v1_get_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_get_test_class_response.py +1 -1
- eval_studio_client/api/models/v1_get_test_response.py +1 -1
- eval_studio_client/api/models/v1_get_workflow_node_prerequisites_response.py +89 -0
- eval_studio_client/api/models/v1_get_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_get_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_import_evaluation_request.py +8 -3
- eval_studio_client/api/models/v1_import_leaderboard_request.py +1 -1
- eval_studio_client/api/models/v1_import_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_import_test_cases_from_library_response.py +91 -0
- eval_studio_client/api/models/v1_import_test_cases_request.py +95 -0
- eval_studio_client/api/models/v1_info.py +10 -4
- eval_studio_client/api/models/v1_init_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_insight.py +1 -1
- eval_studio_client/api/models/v1_labeled_test_case.py +91 -0
- eval_studio_client/api/models/v1_leaderboard.py +1 -1
- eval_studio_client/api/models/v1_leaderboard_report.py +115 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_data.py +93 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +101 -0
- eval_studio_client/api/models/v1_leaderboard_report_evaluator.py +155 -0
- eval_studio_client/api/models/v1_leaderboard_report_evaluator_parameter.py +109 -0
- eval_studio_client/api/models/v1_leaderboard_report_explanation.py +103 -0
- eval_studio_client/api/models/v1_leaderboard_report_metrics_meta_entry.py +129 -0
- eval_studio_client/api/models/v1_leaderboard_report_model.py +113 -0
- eval_studio_client/api/models/v1_leaderboard_report_result.py +175 -0
- eval_studio_client/api/models/v1_leaderboard_report_result_relationship.py +97 -0
- eval_studio_client/api/models/v1_leaderboard_status.py +1 -1
- eval_studio_client/api/models/v1_leaderboard_type.py +1 -1
- eval_studio_client/api/models/v1_leaderboard_view.py +1 -1
- eval_studio_client/api/models/v1_list_base_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_documents_response.py +1 -1
- eval_studio_client/api/models/v1_list_evaluators_response.py +1 -1
- eval_studio_client/api/models/v1_list_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_llm_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_model_collections_response.py +1 -1
- eval_studio_client/api/models/v1_list_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_dashboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_leaderboards_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_models_response.py +1 -1
- eval_studio_client/api/models/v1_list_most_recent_tests_response.py +1 -1
- eval_studio_client/api/models/v1_list_operations_response.py +1 -1
- eval_studio_client/api/models/v1_list_perturbators_response.py +1 -1
- eval_studio_client/api/models/v1_list_prompt_library_items_response.py +95 -0
- eval_studio_client/api/models/v1_list_rag_collections_response.py +1 -1
- eval_studio_client/api/models/v1_list_test_case_library_items_response.py +95 -0
- eval_studio_client/api/models/v1_list_test_case_relationships_response.py +95 -0
- eval_studio_client/api/models/v1_list_test_cases_response.py +1 -1
- eval_studio_client/api/models/v1_list_test_classes_response.py +1 -1
- eval_studio_client/api/models/v1_list_tests_response.py +1 -1
- eval_studio_client/api/models/v1_list_workflow_dependencies_response.py +105 -0
- eval_studio_client/api/models/v1_list_workflows_response.py +95 -0
- eval_studio_client/api/models/v1_metric_score.py +89 -0
- eval_studio_client/api/models/v1_metric_scores.py +95 -0
- eval_studio_client/api/models/v1_model.py +1 -1
- eval_studio_client/api/models/v1_model_type.py +1 -1
- eval_studio_client/api/models/v1_operation.py +1 -1
- eval_studio_client/api/models/v1_operation_progress.py +1 -1
- eval_studio_client/api/models/v1_perturb_test_in_place_response.py +91 -0
- eval_studio_client/api/models/v1_perturb_test_response.py +1 -1
- eval_studio_client/api/models/v1_perturbator.py +1 -1
- eval_studio_client/api/models/v1_perturbator_configuration.py +1 -1
- eval_studio_client/api/models/v1_perturbator_intensity.py +1 -1
- eval_studio_client/api/models/v1_problem_and_action.py +1 -1
- eval_studio_client/api/models/v1_process_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_prompt_library_item.py +129 -0
- eval_studio_client/api/models/v1_repeated_context.py +95 -0
- eval_studio_client/api/models/v1_repeated_string.py +87 -0
- eval_studio_client/api/models/v1_reset_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_test.py +6 -3
- eval_studio_client/api/models/v1_test_case.py +14 -3
- eval_studio_client/api/models/v1_test_case_relationship.py +1 -1
- eval_studio_client/api/models/v1_test_cases_generator.py +1 -1
- eval_studio_client/api/models/v1_test_class.py +1 -1
- eval_studio_client/api/models/v1_test_class_type.py +1 -1
- eval_studio_client/api/models/v1_test_lab.py +1 -1
- eval_studio_client/api/models/v1_test_suite_evaluates.py +39 -0
- eval_studio_client/api/models/v1_test_type.py +38 -0
- eval_studio_client/api/models/v1_update_dashboard_response.py +1 -1
- eval_studio_client/api/models/v1_update_document_response.py +1 -1
- eval_studio_client/api/models/v1_update_leaderboard_response.py +1 -1
- eval_studio_client/api/models/v1_update_model_response.py +1 -1
- eval_studio_client/api/models/v1_update_operation_response.py +1 -1
- eval_studio_client/api/models/v1_update_test_case_response.py +1 -1
- eval_studio_client/api/models/v1_update_test_response.py +1 -1
- eval_studio_client/api/models/v1_update_workflow_node_response.py +91 -0
- eval_studio_client/api/models/v1_update_workflow_response.py +91 -0
- eval_studio_client/api/models/v1_who_am_i_response.py +1 -1
- eval_studio_client/api/models/v1_workflow.py +164 -0
- eval_studio_client/api/models/v1_workflow_dependency.py +89 -0
- eval_studio_client/api/models/v1_workflow_edge.py +123 -0
- eval_studio_client/api/models/v1_workflow_edge_type.py +38 -0
- eval_studio_client/api/models/v1_workflow_node.py +156 -0
- eval_studio_client/api/models/v1_workflow_node_artifact.py +126 -0
- eval_studio_client/api/models/v1_workflow_node_artifacts.py +97 -0
- eval_studio_client/api/models/v1_workflow_node_attributes.py +87 -0
- eval_studio_client/api/models/v1_workflow_node_status.py +40 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +44 -0
- eval_studio_client/api/models/v1_workflow_node_view.py +38 -0
- eval_studio_client/api/models/v1_workflow_type.py +37 -0
- eval_studio_client/api/models/workflow_service_clone_workflow_request.py +95 -0
- eval_studio_client/api/rest.py +1 -1
- eval_studio_client/api/test/test_adversarial_inputs_service_api.py +37 -0
- eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +128 -0
- eval_studio_client/api/test/test_dashboard_service_api.py +1 -1
- eval_studio_client/api/test/test_document_service_api.py +1 -1
- eval_studio_client/api/test/test_evaluation_service_api.py +1 -1
- eval_studio_client/api/test/test_evaluator_service_api.py +1 -1
- eval_studio_client/api/test/test_human_calibration_service_api.py +38 -0
- eval_studio_client/api/test/test_info_service_api.py +1 -1
- eval_studio_client/api/test/test_leaderboard_report_service_api.py +37 -0
- eval_studio_client/api/test/test_leaderboard_service_api.py +1 -1
- eval_studio_client/api/test/test_model_service_api.py +1 -1
- eval_studio_client/api/test/test_operation_progress_service_api.py +1 -1
- eval_studio_client/api/test/test_operation_service_api.py +7 -1
- eval_studio_client/api/test/test_perturbation_service_api.py +1 -1
- eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +25 -3
- eval_studio_client/api/test/test_perturbator_service_api.py +1 -1
- eval_studio_client/api/test/test_prompt_generation_service_api.py +1 -1
- eval_studio_client/api/test/test_prompt_generation_service_auto_generate_prompts_request.py +21 -5
- eval_studio_client/api/test/test_prompt_library_service_api.py +43 -0
- eval_studio_client/api/test/test_protobuf_any.py +1 -1
- eval_studio_client/api/test/test_protobuf_null_value.py +33 -0
- eval_studio_client/api/test/test_required_the_dashboard_to_update.py +3 -2
- eval_studio_client/api/test/test_required_the_document_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_model_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_operation_to_finalize.py +1 -1
- eval_studio_client/api/test/test_required_the_operation_to_update.py +1 -1
- eval_studio_client/api/test/test_required_the_test_case_to_update.py +9 -2
- eval_studio_client/api/test/test_required_the_test_to_update.py +3 -2
- eval_studio_client/api/test/test_required_the_updated_workflow.py +92 -0
- eval_studio_client/api/test/test_required_the_updated_workflow_node.py +81 -0
- eval_studio_client/api/test/test_rpc_status.py +1 -1
- eval_studio_client/api/test/test_test_case_relationship_service_api.py +37 -0
- eval_studio_client/api/test/test_test_case_service_api.py +1 -1
- eval_studio_client/api/test/test_test_case_service_batch_delete_test_cases_request.py +1 -1
- eval_studio_client/api/test/test_test_class_service_api.py +1 -1
- eval_studio_client/api/test/test_test_lab_service_api.py +1 -1
- eval_studio_client/api/test/test_test_service_api.py +25 -1
- eval_studio_client/api/test/test_test_service_clone_test_request.py +52 -0
- eval_studio_client/api/test/test_test_service_generate_test_cases_request.py +17 -2
- eval_studio_client/api/test/test_test_service_import_test_cases_from_library_request.py +56 -0
- eval_studio_client/api/test/test_test_service_list_test_case_library_items_request.py +63 -0
- eval_studio_client/api/test/test_test_service_perturb_test_in_place_request.py +59 -0
- eval_studio_client/api/test/test_test_service_perturb_test_request.py +5 -2
- eval_studio_client/api/test/test_v1_abort_operation_response.py +71 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_dashboards_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_delete_documents_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_documents_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_evaluators_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_evaluators_response.py +4 -2
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_models_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_test_cases_response.py +9 -2
- eval_studio_client/api/test/test_v1_batch_delete_tests_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_delete_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_delete_workflows_request.py +53 -0
- eval_studio_client/api/test/test_v1_batch_delete_workflows_response.py +95 -0
- eval_studio_client/api/test/test_v1_batch_get_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_get_documents_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_operations_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_get_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_batch_get_workflow_edges_response.py +64 -0
- eval_studio_client/api/test/test_v1_batch_get_workflow_nodes_response.py +84 -0
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_batch_import_tests_request.py +1 -1
- eval_studio_client/api/test/test_v1_batch_import_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_check_base_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_clone_test_response.py +68 -0
- eval_studio_client/api/test/test_v1_clone_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_collection_info.py +1 -1
- eval_studio_client/api/test/test_v1_context.py +59 -0
- eval_studio_client/api/test/test_v1_create_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_create_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +25 -3
- eval_studio_client/api/test/test_v1_create_evaluator_response.py +4 -2
- eval_studio_client/api/test/test_v1_create_leaderboard_request.py +1 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_perturbation_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_create_test_lab_response.py +1 -1
- eval_studio_client/api/test/test_v1_create_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_create_workflow_edge_response.py +62 -0
- eval_studio_client/api/test/test_v1_create_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_create_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_dashboard.py +3 -2
- eval_studio_client/api/test/test_v1_dashboard_status.py +1 -1
- eval_studio_client/api/test/test_v1_dashboard_type.py +33 -0
- eval_studio_client/api/test/test_v1_delete_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_delete_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_delete_evaluator_response.py +4 -2
- eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_delete_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_delete_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_delete_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_delete_workflow_edge_response.py +62 -0
- eval_studio_client/api/test/test_v1_delete_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_delete_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_dependency_list.py +56 -0
- eval_studio_client/api/test/test_v1_document.py +1 -1
- eval_studio_client/api/test/test_v1_estimate_threshold_request.py +60 -0
- eval_studio_client/api/test/test_v1_evaluation_test.py +9 -2
- eval_studio_client/api/test/test_v1_evaluator.py +4 -2
- eval_studio_client/api/test/test_v1_evaluator_param_type.py +1 -1
- eval_studio_client/api/test/test_v1_evaluator_parameter.py +1 -1
- eval_studio_client/api/test/test_v1_evaluator_view.py +1 -1
- eval_studio_client/api/test/test_v1_finalize_operation_response.py +1 -1
- eval_studio_client/api/test/test_v1_find_all_test_cases_by_id_response.py +9 -2
- eval_studio_client/api/test/test_v1_find_test_lab_response.py +1 -1
- eval_studio_client/api/test/test_v1_generate_test_cases_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_get_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_evaluator_response.py +4 -2
- eval_studio_client/api/test/test_v1_get_guardrails_configuration_response.py +51 -0
- eval_studio_client/api/test/test_v1_get_info_response.py +7 -2
- eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +173 -0
- eval_studio_client/api/test/test_v1_get_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_operation_progress_by_parent_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_operation_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_perturbator_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_get_test_class_response.py +1 -1
- eval_studio_client/api/test/test_v1_get_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_get_workflow_node_prerequisites_response.py +56 -0
- eval_studio_client/api/test/test_v1_get_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_get_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_import_evaluation_request.py +17 -2
- eval_studio_client/api/test/test_v1_import_leaderboard_request.py +1 -1
- eval_studio_client/api/test/test_v1_import_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +71 -0
- eval_studio_client/api/test/test_v1_import_test_cases_request.py +57 -0
- eval_studio_client/api/test/test_v1_info.py +7 -2
- eval_studio_client/api/test/test_v1_init_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_insight.py +1 -1
- eval_studio_client/api/test/test_v1_labeled_test_case.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard.py +1 -1
- eval_studio_client/api/test/test_v1_leaderboard_report.py +172 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_data.py +52 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +56 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_evaluator.py +114 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_evaluator_parameter.py +63 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_explanation.py +58 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_metrics_meta_entry.py +66 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_model.py +60 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_result.py +92 -0
- eval_studio_client/api/test/test_v1_leaderboard_report_result_relationship.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard_status.py +1 -1
- eval_studio_client/api/test/test_v1_leaderboard_type.py +1 -1
- eval_studio_client/api/test/test_v1_leaderboard_view.py +1 -1
- eval_studio_client/api/test/test_v1_list_base_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_documents_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_evaluators_response.py +4 -2
- eval_studio_client/api/test/test_v1_list_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_llm_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_model_collections_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_most_recent_dashboards_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_most_recent_models_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_most_recent_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_operations_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_perturbators_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_prompt_library_items_response.py +71 -0
- eval_studio_client/api/test/test_v1_list_rag_collections_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_test_case_library_items_response.py +71 -0
- eval_studio_client/api/test/test_v1_list_test_case_relationships_response.py +56 -0
- eval_studio_client/api/test/test_v1_list_test_cases_response.py +9 -2
- eval_studio_client/api/test/test_v1_list_test_classes_response.py +1 -1
- eval_studio_client/api/test/test_v1_list_tests_response.py +3 -2
- eval_studio_client/api/test/test_v1_list_workflow_dependencies_response.py +93 -0
- eval_studio_client/api/test/test_v1_list_workflows_response.py +95 -0
- eval_studio_client/api/test/test_v1_metric_score.py +52 -0
- eval_studio_client/api/test/test_v1_metric_scores.py +55 -0
- eval_studio_client/api/test/test_v1_model.py +1 -1
- eval_studio_client/api/test/test_v1_model_type.py +1 -1
- eval_studio_client/api/test/test_v1_operation.py +1 -1
- eval_studio_client/api/test/test_v1_operation_progress.py +1 -1
- eval_studio_client/api/test/test_v1_perturb_test_in_place_response.py +68 -0
- eval_studio_client/api/test/test_v1_perturb_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_perturbator.py +1 -1
- eval_studio_client/api/test/test_v1_perturbator_configuration.py +1 -1
- eval_studio_client/api/test/test_v1_perturbator_intensity.py +1 -1
- eval_studio_client/api/test/test_v1_problem_and_action.py +1 -1
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +71 -0
- eval_studio_client/api/test/test_v1_prompt_library_item.py +68 -0
- eval_studio_client/api/test/test_v1_repeated_context.py +62 -0
- eval_studio_client/api/test/test_v1_repeated_string.py +53 -0
- eval_studio_client/api/test/test_v1_reset_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_test.py +3 -2
- eval_studio_client/api/test/test_v1_test_case.py +9 -2
- eval_studio_client/api/test/test_v1_test_case_relationship.py +1 -1
- eval_studio_client/api/test/test_v1_test_cases_generator.py +1 -1
- eval_studio_client/api/test/test_v1_test_class.py +1 -1
- eval_studio_client/api/test/test_v1_test_class_type.py +1 -1
- eval_studio_client/api/test/test_v1_test_lab.py +1 -1
- eval_studio_client/api/test/test_v1_test_suite_evaluates.py +33 -0
- eval_studio_client/api/test/test_v1_test_type.py +33 -0
- eval_studio_client/api/test/test_v1_update_dashboard_response.py +3 -2
- eval_studio_client/api/test/test_v1_update_document_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_leaderboard_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_model_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_operation_response.py +1 -1
- eval_studio_client/api/test/test_v1_update_test_case_response.py +9 -2
- eval_studio_client/api/test/test_v1_update_test_response.py +3 -2
- eval_studio_client/api/test/test_v1_update_workflow_node_response.py +82 -0
- eval_studio_client/api/test/test_v1_update_workflow_response.py +93 -0
- eval_studio_client/api/test/test_v1_who_am_i_response.py +1 -1
- eval_studio_client/api/test/test_v1_workflow.py +93 -0
- eval_studio_client/api/test/test_v1_workflow_dependency.py +52 -0
- eval_studio_client/api/test/test_v1_workflow_edge.py +61 -0
- eval_studio_client/api/test/test_v1_workflow_edge_type.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node.py +82 -0
- eval_studio_client/api/test/test_v1_workflow_node_artifact.py +62 -0
- eval_studio_client/api/test/test_v1_workflow_node_artifacts.py +65 -0
- eval_studio_client/api/test/test_v1_workflow_node_attributes.py +51 -0
- eval_studio_client/api/test/test_v1_workflow_node_status.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node_type.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_node_view.py +33 -0
- eval_studio_client/api/test/test_v1_workflow_type.py +33 -0
- eval_studio_client/api/test/test_who_am_i_service_api.py +1 -1
- eval_studio_client/api/test/test_workflow_edge_service_api.py +52 -0
- eval_studio_client/api/test/test_workflow_node_service_api.py +94 -0
- eval_studio_client/api/test/test_workflow_service_api.py +93 -0
- eval_studio_client/api/test/test_workflow_service_clone_workflow_request.py +55 -0
- eval_studio_client/client.py +7 -0
- eval_studio_client/dashboards.py +29 -0
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +5318 -1884
- eval_studio_client/leaderboards.py +123 -0
- eval_studio_client/models.py +3 -42
- eval_studio_client/test_labs.py +49 -21
- eval_studio_client/tests.py +290 -8
- {eval_studio_client-1.0.3.dist-info → eval_studio_client-1.1.0.dist-info}/METADATA +1 -2
- eval_studio_client-1.1.0.dist-info/RECORD +732 -0
- eval_studio_client-1.0.3.dist-info/RECORD +0 -486
- {eval_studio_client-1.0.3.dist-info → eval_studio_client-1.1.0.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import datetime
|
|
3
3
|
import json
|
|
4
|
+
import os
|
|
4
5
|
import time
|
|
5
6
|
from typing import Dict
|
|
6
7
|
from typing import List
|
|
@@ -33,6 +34,7 @@ class Leaderboard:
|
|
|
33
34
|
update_time: Optional[datetime.datetime] = None
|
|
34
35
|
problems: List[p6s.Problem] = dataclasses.field(default_factory=list)
|
|
35
36
|
insights: List[i6s.Insight] = dataclasses.field(default_factory=list)
|
|
37
|
+
summary: Optional[str] = None
|
|
36
38
|
existing_collection: Optional[str] = None
|
|
37
39
|
_report: Optional[str] = None
|
|
38
40
|
_leaderboard: Optional[str] = None
|
|
@@ -86,6 +88,42 @@ class Leaderboard:
|
|
|
86
88
|
if self._client:
|
|
87
89
|
self._leaderboard_api.leaderboard_service_delete_leaderboard(self.key)
|
|
88
90
|
|
|
91
|
+
def download_result(self, dest: str):
|
|
92
|
+
"""Downloads the leaderboard result to a JSON file.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
dest (str): The destination path for the report.
|
|
96
|
+
"""
|
|
97
|
+
if not os.path.exists(dest):
|
|
98
|
+
raise ValueError("Destination path does not exist.")
|
|
99
|
+
|
|
100
|
+
if os.path.isdir(dest):
|
|
101
|
+
dest = os.path.join(dest, "results.json")
|
|
102
|
+
|
|
103
|
+
if self._client and self.finished:
|
|
104
|
+
headers: Dict[str, str] = {}
|
|
105
|
+
url = urljoin(
|
|
106
|
+
self._client.configuration.host, f"/content/{self.key}/results"
|
|
107
|
+
)
|
|
108
|
+
self._client.update_params_for_auth(
|
|
109
|
+
headers=headers,
|
|
110
|
+
queries=[],
|
|
111
|
+
auth_settings=[],
|
|
112
|
+
resource_path=url,
|
|
113
|
+
method="GET",
|
|
114
|
+
body=None,
|
|
115
|
+
)
|
|
116
|
+
response = urllib3.request("GET", url, headers=headers)
|
|
117
|
+
|
|
118
|
+
if response.status == 200:
|
|
119
|
+
with open(dest, "wb") as f:
|
|
120
|
+
f.write(response.data)
|
|
121
|
+
return
|
|
122
|
+
else:
|
|
123
|
+
raise RuntimeError("Failed to retrieve leaderboard result.")
|
|
124
|
+
|
|
125
|
+
raise ValueError("Cannot download result for unfinished leaderboard.")
|
|
126
|
+
|
|
89
127
|
def download_report(self, dest: str):
|
|
90
128
|
"""Downloads the leaderboard report to a zip file.
|
|
91
129
|
|
|
@@ -114,6 +152,30 @@ class Leaderboard:
|
|
|
114
152
|
|
|
115
153
|
raise ValueError("Cannot download report for unfinished leaderboard.")
|
|
116
154
|
|
|
155
|
+
def get_result_json(self) -> str:
|
|
156
|
+
"""Retrieves the leaderboard result as a JSON string."""
|
|
157
|
+
if self._client and self.finished:
|
|
158
|
+
headers: Dict[str, str] = {}
|
|
159
|
+
url = urljoin(
|
|
160
|
+
self._client.configuration.host, f"/content/{self.key}/results"
|
|
161
|
+
)
|
|
162
|
+
self._client.update_params_for_auth(
|
|
163
|
+
headers=headers,
|
|
164
|
+
queries=[],
|
|
165
|
+
auth_settings=[],
|
|
166
|
+
resource_path=url,
|
|
167
|
+
method="GET",
|
|
168
|
+
body=None,
|
|
169
|
+
)
|
|
170
|
+
response = urllib3.request("GET", url, headers=headers)
|
|
171
|
+
|
|
172
|
+
if response.status == 200:
|
|
173
|
+
return str(response.data)
|
|
174
|
+
else:
|
|
175
|
+
raise RuntimeError("Failed to retrieve leaderboard result.")
|
|
176
|
+
|
|
177
|
+
raise ValueError("Cannot download result for unfinished leaderboard.")
|
|
178
|
+
|
|
117
179
|
def get_table(self) -> LeaderboardTable:
|
|
118
180
|
"""Retrieves the leaderboard table."""
|
|
119
181
|
if self._client and self.finished:
|
|
@@ -170,6 +232,7 @@ class Leaderboard:
|
|
|
170
232
|
"""Refresh the leaderboard with the latest API data."""
|
|
171
233
|
self.key = api_leaderboard.name or ""
|
|
172
234
|
self.update_time = api_leaderboard.update_time
|
|
235
|
+
self.summary = api_leaderboard.leaderboard_summary
|
|
173
236
|
self._leaderboard = api_leaderboard.leaderboard_table
|
|
174
237
|
self._report = api_leaderboard.leaderboard_report or ""
|
|
175
238
|
self._status = api_leaderboard.status
|
|
@@ -192,6 +255,7 @@ class Leaderboard:
|
|
|
192
255
|
update_time=api_leaderboard.update_time,
|
|
193
256
|
problems=problems,
|
|
194
257
|
insights=insights,
|
|
258
|
+
summary=api_leaderboard.leaderboard_summary,
|
|
195
259
|
existing_collection=api_leaderboard.h2ogpte_collection or None,
|
|
196
260
|
_evaluator_name=api_leaderboard.evaluator or "",
|
|
197
261
|
_test_names=api_leaderboard.tests or [],
|
|
@@ -208,3 +272,62 @@ class Leaderboard:
|
|
|
208
272
|
models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
|
|
209
273
|
models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
|
|
210
274
|
]
|
|
275
|
+
|
|
276
|
+
@staticmethod
|
|
277
|
+
def from_operation(
|
|
278
|
+
operation: models.V1Operation, client: Optional[api.ApiClient]
|
|
279
|
+
) -> Optional["Leaderboard"]:
|
|
280
|
+
"""Retrieves the leaderboard from the operation, which created it.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
operation: The operation that created the dashboard.
|
|
284
|
+
client: The API client to use for the leaderboard retrieval.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Leaderboard: The leaderboard instance created by the operation.
|
|
288
|
+
"""
|
|
289
|
+
if not client:
|
|
290
|
+
raise RuntimeError("API Client is not provided")
|
|
291
|
+
|
|
292
|
+
if not operation.metadata:
|
|
293
|
+
raise RuntimeError(
|
|
294
|
+
"Operation metadata missing, it's not possible to retrieve leaderboard from operation"
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
leaderboard_api = api.LeaderboardServiceApi(client)
|
|
298
|
+
leadeboard_id = operation.metadata.to_dict().get("leaderboard", "")
|
|
299
|
+
res = leaderboard_api.leaderboard_service_get_leaderboard(str(leadeboard_id))
|
|
300
|
+
if res and res.leaderboard:
|
|
301
|
+
return Leaderboard._from_api_leaderboard(res.leaderboard, client)
|
|
302
|
+
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class _Leaderboards:
|
|
307
|
+
def __init__(self, client: api.ApiClient):
|
|
308
|
+
self._client = client
|
|
309
|
+
self._api = api.LeaderboardServiceApi(client)
|
|
310
|
+
|
|
311
|
+
def get(self, key: str) -> Leaderboard:
|
|
312
|
+
"""Gets an individual leaderboard with a given key from Eval Studio.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
key: The leaderboard resource name to retrieve.
|
|
316
|
+
"""
|
|
317
|
+
res = self._api.leaderboard_service_get_leaderboard(key)
|
|
318
|
+
if res and res.leaderboard:
|
|
319
|
+
return Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
|
|
320
|
+
|
|
321
|
+
raise KeyError("Leaderboard not found.")
|
|
322
|
+
|
|
323
|
+
def list(self) -> List[Leaderboard]:
|
|
324
|
+
"""Lists all user leaderboards in Eval Studio."""
|
|
325
|
+
res = self._api.leaderboard_service_list_leaderboards()
|
|
326
|
+
if res:
|
|
327
|
+
res_leaderboards = res.leaderboards or []
|
|
328
|
+
return [
|
|
329
|
+
Leaderboard._from_api_leaderboard(lb, self._client)
|
|
330
|
+
for lb in res_leaderboards
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
return []
|
eval_studio_client/models.py
CHANGED
|
@@ -168,7 +168,7 @@ class Model:
|
|
|
168
168
|
)
|
|
169
169
|
|
|
170
170
|
if res and res.operation:
|
|
171
|
-
return
|
|
171
|
+
return l10s.Leaderboard.from_operation(res.operation, self._client)
|
|
172
172
|
|
|
173
173
|
return None
|
|
174
174
|
|
|
@@ -226,7 +226,7 @@ class Model:
|
|
|
226
226
|
)
|
|
227
227
|
|
|
228
228
|
if res and res.operation:
|
|
229
|
-
return
|
|
229
|
+
return d8s.Dashboard.from_operation(res.operation, self._client)
|
|
230
230
|
|
|
231
231
|
return None
|
|
232
232
|
|
|
@@ -257,7 +257,7 @@ class Model:
|
|
|
257
257
|
)
|
|
258
258
|
res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
|
|
259
259
|
if res and res.operation:
|
|
260
|
-
return
|
|
260
|
+
return l10s.Leaderboard.from_operation(res.operation, self._client)
|
|
261
261
|
|
|
262
262
|
return None
|
|
263
263
|
|
|
@@ -273,45 +273,6 @@ class Model:
|
|
|
273
273
|
|
|
274
274
|
raise RuntimeError("Failed to list base models")
|
|
275
275
|
|
|
276
|
-
def _get_leaderboard_from_operation(
|
|
277
|
-
self, operation: models.V1Operation
|
|
278
|
-
) -> Optional[l10s.Leaderboard]:
|
|
279
|
-
"""Retrieves the leaderboard from the operation, which created it.
|
|
280
|
-
|
|
281
|
-
Args:
|
|
282
|
-
operation: The operation that created the leaderboard.
|
|
283
|
-
"""
|
|
284
|
-
if not operation.metadata:
|
|
285
|
-
raise RuntimeError("Not possible to retrieve leaderboard from operation")
|
|
286
|
-
|
|
287
|
-
leadeboard_id = operation.metadata.to_dict().get("leaderboard")
|
|
288
|
-
res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
|
|
289
|
-
if res and res.leaderboard:
|
|
290
|
-
return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
|
|
291
|
-
|
|
292
|
-
return None
|
|
293
|
-
|
|
294
|
-
def _get_dashboard_from_operation(
|
|
295
|
-
self, operation: models.V1Operation
|
|
296
|
-
) -> Optional[d8s.Dashboard]:
|
|
297
|
-
"""Retrieves the dashboard from the operation, which created it.
|
|
298
|
-
|
|
299
|
-
Args:
|
|
300
|
-
operation: The operation that created the dashboard.
|
|
301
|
-
"""
|
|
302
|
-
if not self._client:
|
|
303
|
-
raise RuntimeError("Client is not set.")
|
|
304
|
-
|
|
305
|
-
if not operation.metadata:
|
|
306
|
-
raise RuntimeError("Not possible to retrieve dashboard from operation")
|
|
307
|
-
|
|
308
|
-
dashboard_id = operation.metadata.to_dict().get("dashboard")
|
|
309
|
-
res = self._dashboard_api.dashboard_service_get_dashboard(dashboard_id)
|
|
310
|
-
if res and res.dashboard:
|
|
311
|
-
return d8s.Dashboard._from_api_dashboard(res.dashboard, self._client)
|
|
312
|
-
|
|
313
|
-
return None
|
|
314
|
-
|
|
315
276
|
@staticmethod
|
|
316
277
|
def _from_api_model(api_model: models.V1Model, client: api.ApiClient) -> "Model":
|
|
317
278
|
"""Converts the API model to the client model."""
|
eval_studio_client/test_labs.py
CHANGED
|
@@ -7,7 +7,8 @@ from typing import Union
|
|
|
7
7
|
import uuid
|
|
8
8
|
|
|
9
9
|
from eval_studio_client import api
|
|
10
|
-
from eval_studio_client import
|
|
10
|
+
from eval_studio_client import dashboards
|
|
11
|
+
from eval_studio_client import evaluators as e8s
|
|
11
12
|
from eval_studio_client import leaderboards as l10s
|
|
12
13
|
from eval_studio_client.api import models as apiModels
|
|
13
14
|
|
|
@@ -92,11 +93,56 @@ class TestLab:
|
|
|
92
93
|
self._models.append(_m)
|
|
93
94
|
return _m
|
|
94
95
|
|
|
95
|
-
def evaluate(
|
|
96
|
+
def evaluate(
|
|
97
|
+
self,
|
|
98
|
+
evaluators: Union[e8s.Evaluator, List[e8s.Evaluator]],
|
|
99
|
+
name: Optional[str] = None,
|
|
100
|
+
description: Optional[str] = None,
|
|
101
|
+
) -> Optional[dashboards.Dashboard]:
|
|
96
102
|
"""Runs an evaluation for the test lab.
|
|
97
103
|
|
|
104
|
+
Args:
|
|
105
|
+
evaluators (Union[e8s.Evaluator, List[e8s.Evaluator]]): One or many evaluators
|
|
106
|
+
used to evaluate the test lab.
|
|
107
|
+
name (str, optional): Optional name for the evaluation.
|
|
108
|
+
description (str, optional): Optional description for the evaluation.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Dashboard: Evaluation dashboard instance. In case launching of evaluation
|
|
112
|
+
fails, `None` is returned.
|
|
113
|
+
"""
|
|
114
|
+
_evaluators = (
|
|
115
|
+
[evaluators] if isinstance(evaluators, e8s.Evaluator) else evaluators
|
|
116
|
+
)
|
|
117
|
+
name = name or self.name or "Imported Dashboard"
|
|
118
|
+
description = description or self.description or ""
|
|
119
|
+
req = apiModels.V1BatchImportLeaderboardRequest(
|
|
120
|
+
testLabJson=self.json(),
|
|
121
|
+
evaluators=[e.key for e in _evaluators],
|
|
122
|
+
model=None,
|
|
123
|
+
dashboardDisplayName=name,
|
|
124
|
+
dashboardDescription=description,
|
|
125
|
+
testDisplayName=f"{name} - Test",
|
|
126
|
+
testDescription=f"Test suite for {description}",
|
|
127
|
+
)
|
|
128
|
+
res = self._leaderboard_api.leaderboard_service_batch_import_leaderboard(req)
|
|
129
|
+
|
|
130
|
+
if res and res.operation:
|
|
131
|
+
return dashboards.Dashboard.from_operation(res.operation, self._client)
|
|
132
|
+
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
def create_leaderboard(
|
|
136
|
+
self, evaluator: e8s.Evaluator
|
|
137
|
+
) -> Optional[l10s.Leaderboard]:
|
|
138
|
+
"""Creates a single leaderboard for the test lab.
|
|
139
|
+
|
|
98
140
|
Args:
|
|
99
141
|
evaluator: The evaluator to use for the evaluation.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Leaderboard: Single evaluation leaderboard instance.
|
|
145
|
+
In case launching of evaluation fails, `None` is returned.
|
|
100
146
|
"""
|
|
101
147
|
req = apiModels.V1ImportLeaderboardRequest(
|
|
102
148
|
testLabJson=self.json(),
|
|
@@ -109,7 +155,7 @@ class TestLab:
|
|
|
109
155
|
)
|
|
110
156
|
res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
|
|
111
157
|
if res and res.operation:
|
|
112
|
-
return
|
|
158
|
+
return l10s.Leaderboard.from_operation(res.operation, self._client)
|
|
113
159
|
|
|
114
160
|
return None
|
|
115
161
|
|
|
@@ -131,24 +177,6 @@ class TestLab:
|
|
|
131
177
|
|
|
132
178
|
return json.dumps(lab, indent=4, sort_keys=True)
|
|
133
179
|
|
|
134
|
-
def _get_leaderboard_from_operation(
|
|
135
|
-
self, operation: apiModels.V1Operation
|
|
136
|
-
) -> Optional[l10s.Leaderboard]:
|
|
137
|
-
"""Retrieves the leaderboard from the operation, which created it.
|
|
138
|
-
|
|
139
|
-
Args:
|
|
140
|
-
operation: The operation that created the leaderboard.
|
|
141
|
-
"""
|
|
142
|
-
if not operation.metadata:
|
|
143
|
-
raise RuntimeError("Not possible to retrieve leaderboard from operation")
|
|
144
|
-
|
|
145
|
-
leadeboard_id = operation.metadata.to_dict().get("leaderboard")
|
|
146
|
-
res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
|
|
147
|
-
if res and res.leaderboard:
|
|
148
|
-
return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
|
|
149
|
-
|
|
150
|
-
return None
|
|
151
|
-
|
|
152
180
|
def _llm_model_names(self) -> List[str]:
|
|
153
181
|
return [m.llm_model_name for m in self.models]
|
|
154
182
|
|