eval-studio-client 1.2.4a2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +65 -0
- eval_studio_client/api/api/__init__.py +3 -0
- eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
- eval_studio_client/api/api/dashboard_service_api.py +16 -16
- eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/document_service_api.py +16 -16
- eval_studio_client/api/api/evaluation_service_api.py +12 -12
- eval_studio_client/api/api/evaluator_service_api.py +16 -16
- eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
- eval_studio_client/api/api/leaderboard_service_api.py +554 -16
- eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/model_service_api.py +16 -16
- eval_studio_client/api/api/operation_service_api.py +821 -17
- eval_studio_client/api/api/perturbator_service_api.py +22 -22
- eval_studio_client/api/api/test_case_service_api.py +300 -16
- eval_studio_client/api/api/test_class_service_api.py +16 -16
- eval_studio_client/api/api/test_service_api.py +285 -16
- eval_studio_client/api/api/workflow_node_service_api.py +16 -16
- eval_studio_client/api/api/workflow_service_api.py +16 -16
- eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
- eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
- eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
- eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
- eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
- eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
- eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
- eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
- eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/ModelServiceApi.md +5 -5
- eval_studio_client/api/docs/OperationServiceApi.md +215 -8
- eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
- eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
- eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
- eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
- eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
- eval_studio_client/api/docs/TestServiceApi.md +73 -5
- eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
- eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
- eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
- eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
- eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
- eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
- eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
- eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1DashboardReport.md +31 -0
- eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
- eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1DataFragment.md +31 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
- eval_studio_client/api/docs/V1DiffItem.md +36 -0
- eval_studio_client/api/docs/V1EvaluationType.md +12 -0
- eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
- eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
- eval_studio_client/api/docs/V1HumanDecision.md +12 -0
- eval_studio_client/api/docs/V1Info.md +1 -0
- eval_studio_client/api/docs/V1Leaderboard.md +1 -0
- eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
- eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
- eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
- eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
- eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
- eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1Metric.md +30 -0
- eval_studio_client/api/docs/V1MetricAverage.md +36 -0
- eval_studio_client/api/docs/V1MetricMeta.md +40 -0
- eval_studio_client/api/docs/V1MetricScore.md +1 -1
- eval_studio_client/api/docs/V1MetricScores.md +1 -1
- eval_studio_client/api/docs/V1ModelType.md +1 -1
- eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
- eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
- eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
- eval_studio_client/api/docs/V1Operation.md +1 -0
- eval_studio_client/api/docs/V1OperationView.md +12 -0
- eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
- eval_studio_client/api/docs/V1Stats.md +2 -0
- eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
- eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
- eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
- eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
- eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
- eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
- eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
- eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
- eval_studio_client/api/models/__init__.py +62 -0
- eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
- eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
- eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
- eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
- eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
- eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
- eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
- eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
- eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
- eval_studio_client/api/models/v1_comparison_item.py +130 -0
- eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
- eval_studio_client/api/models/v1_comparison_result.py +120 -0
- eval_studio_client/api/models/v1_comparison_summary.py +91 -0
- eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
- eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
- eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
- eval_studio_client/api/models/v1_dashboard_report.py +109 -0
- eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
- eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_data_fragment.py +91 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
- eval_studio_client/api/models/v1_diff_item.py +137 -0
- eval_studio_client/api/models/v1_evaluation_type.py +39 -0
- eval_studio_client/api/models/v1_flipped_metric.py +91 -0
- eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
- eval_studio_client/api/models/v1_human_decision.py +38 -0
- eval_studio_client/api/models/v1_info.py +4 -2
- eval_studio_client/api/models/v1_leaderboard.py +5 -2
- eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
- eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
- eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
- eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
- eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
- eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_operations_response.py +5 -3
- eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
- eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
- eval_studio_client/api/models/v1_metric.py +89 -0
- eval_studio_client/api/models/v1_metric_average.py +101 -0
- eval_studio_client/api/models/v1_metric_meta.py +109 -0
- eval_studio_client/api/models/v1_metric_score.py +6 -1
- eval_studio_client/api/models/v1_metric_scores.py +1 -1
- eval_studio_client/api/models/v1_model_type.py +2 -1
- eval_studio_client/api/models/v1_models_comparisons.py +93 -0
- eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
- eval_studio_client/api/models/v1_models_overview.py +97 -0
- eval_studio_client/api/models/v1_operation.py +6 -2
- eval_studio_client/api/models/v1_operation_view.py +38 -0
- eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
- eval_studio_client/api/models/v1_stats.py +16 -2
- eval_studio_client/api/models/v1_technical_metrics.py +96 -0
- eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
- eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
- eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
- eval_studio_client/api/models/v1_test_case_result.py +157 -0
- eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
- eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
- eval_studio_client/api/models/v1_workflow_type.py +1 -0
- eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
- eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
- eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
- eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
- eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_operation_service_api.py +18 -0
- eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
- eval_studio_client/api/test/test_test_case_service_api.py +6 -0
- eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
- eval_studio_client/api/test/test_test_service_api.py +6 -0
- eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
- eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
- eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
- eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
- eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
- eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
- eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
- eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
- eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
- eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
- eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
- eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_diff_item.py +226 -0
- eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
- eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
- eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
- eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
- eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
- eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
- eval_studio_client/api/test/test_v1_human_decision.py +33 -0
- eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
- eval_studio_client/api/test/test_v1_info.py +4 -1
- eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
- eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
- eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
- eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
- eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
- eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
- eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
- eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
- eval_studio_client/api/test/test_v1_metric.py +52 -0
- eval_studio_client/api/test/test_v1_metric_average.py +58 -0
- eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
- eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
- eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
- eval_studio_client/api/test/test_v1_models_overview.py +60 -0
- eval_studio_client/api/test/test_v1_operation.py +2 -1
- eval_studio_client/api/test/test_v1_operation_view.py +33 -0
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
- eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
- eval_studio_client/api/test/test_v1_stats.py +3 -1
- eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
- eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
- eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
- eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
- eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
- eval_studio_client/models.py +18 -6
- {eval_studio_client-1.2.4a2.dist-info → eval_studio_client-1.3.0.dist-info}/METADATA +2 -2
- {eval_studio_client-1.2.4a2.dist-info → eval_studio_client-1.3.0.dist-info}/RECORD +306 -111
- {eval_studio_client-1.2.4a2.dist-info → eval_studio_client-1.3.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_dashboard_report_result import V1DashboardReportResult
|
|
18
|
+
|
|
19
|
+
class TestV1DashboardReportResult(unittest.TestCase):
|
|
20
|
+
"""V1DashboardReportResult unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1DashboardReportResult:
|
|
29
|
+
"""Test V1DashboardReportResult
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1DashboardReportResult`
|
|
34
|
+
"""
|
|
35
|
+
model = V1DashboardReportResult()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1DashboardReportResult(
|
|
38
|
+
key = '',
|
|
39
|
+
input = '',
|
|
40
|
+
expected_output = '',
|
|
41
|
+
actual_output = '',
|
|
42
|
+
model_key = '',
|
|
43
|
+
test_case_key = '',
|
|
44
|
+
metrics = {
|
|
45
|
+
'key' : eval_studio_client.api.models.v1_metric_scores.v1MetricScores(
|
|
46
|
+
scores = [
|
|
47
|
+
eval_studio_client.api.models.v1_metric_score.v1MetricScore(
|
|
48
|
+
key = '',
|
|
49
|
+
value = 1.337, )
|
|
50
|
+
], )
|
|
51
|
+
},
|
|
52
|
+
result_error_map = {
|
|
53
|
+
'key' : ''
|
|
54
|
+
},
|
|
55
|
+
human_decision = 'HUMAN_DECISION_UNSPECIFIED',
|
|
56
|
+
comment = '',
|
|
57
|
+
annotations = {
|
|
58
|
+
'key' : None
|
|
59
|
+
}
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
return V1DashboardReportResult(
|
|
63
|
+
)
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def testV1DashboardReportResult(self):
|
|
67
|
+
"""Test V1DashboardReportResult"""
|
|
68
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
69
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
70
|
+
|
|
71
|
+
if __name__ == '__main__':
|
|
72
|
+
unittest.main()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_dashboard_test_case_annotation import V1DashboardTestCaseAnnotation
|
|
18
|
+
|
|
19
|
+
class TestV1DashboardTestCaseAnnotation(unittest.TestCase):
|
|
20
|
+
"""V1DashboardTestCaseAnnotation unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1DashboardTestCaseAnnotation:
|
|
29
|
+
"""Test V1DashboardTestCaseAnnotation
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1DashboardTestCaseAnnotation`
|
|
34
|
+
"""
|
|
35
|
+
model = V1DashboardTestCaseAnnotation()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1DashboardTestCaseAnnotation(
|
|
38
|
+
name = '',
|
|
39
|
+
create_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
|
|
40
|
+
creator = '',
|
|
41
|
+
update_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
|
|
42
|
+
updater = '',
|
|
43
|
+
parent = '',
|
|
44
|
+
key = '',
|
|
45
|
+
value = None
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
return V1DashboardTestCaseAnnotation(
|
|
49
|
+
)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def testV1DashboardTestCaseAnnotation(self):
|
|
53
|
+
"""Test V1DashboardTestCaseAnnotation"""
|
|
54
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
55
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
56
|
+
|
|
57
|
+
if __name__ == '__main__':
|
|
58
|
+
unittest.main()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_data_fragment import V1DataFragment
|
|
18
|
+
|
|
19
|
+
class TestV1DataFragment(unittest.TestCase):
|
|
20
|
+
"""V1DataFragment unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1DataFragment:
|
|
29
|
+
"""Test V1DataFragment
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1DataFragment`
|
|
34
|
+
"""
|
|
35
|
+
model = V1DataFragment()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1DataFragment(
|
|
38
|
+
text = '',
|
|
39
|
+
metrics = {
|
|
40
|
+
'key' : 1.337
|
|
41
|
+
},
|
|
42
|
+
meta = {
|
|
43
|
+
'key' : ''
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
return V1DataFragment(
|
|
48
|
+
)
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def testV1DataFragment(self):
|
|
52
|
+
"""Test V1DataFragment"""
|
|
53
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
54
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
55
|
+
|
|
56
|
+
if __name__ == '__main__':
|
|
57
|
+
unittest.main()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_deep_compare_leaderboards_request import V1DeepCompareLeaderboardsRequest
|
|
18
|
+
|
|
19
|
+
class TestV1DeepCompareLeaderboardsRequest(unittest.TestCase):
|
|
20
|
+
"""V1DeepCompareLeaderboardsRequest unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1DeepCompareLeaderboardsRequest:
|
|
29
|
+
"""Test V1DeepCompareLeaderboardsRequest
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1DeepCompareLeaderboardsRequest`
|
|
34
|
+
"""
|
|
35
|
+
model = V1DeepCompareLeaderboardsRequest()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1DeepCompareLeaderboardsRequest(
|
|
38
|
+
leaderboard_baseline_name = '',
|
|
39
|
+
leaderboard_current_name = '',
|
|
40
|
+
text_similarity_metric = 'TEXT_SIMILARITY_METRIC_UNSPECIFIED',
|
|
41
|
+
llm_model_baseline_name = '',
|
|
42
|
+
llm_model_current_name = ''
|
|
43
|
+
)
|
|
44
|
+
else:
|
|
45
|
+
return V1DeepCompareLeaderboardsRequest(
|
|
46
|
+
)
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def testV1DeepCompareLeaderboardsRequest(self):
|
|
50
|
+
"""Test V1DeepCompareLeaderboardsRequest"""
|
|
51
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
52
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
53
|
+
|
|
54
|
+
if __name__ == '__main__':
|
|
55
|
+
unittest.main()
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_deep_compare_leaderboards_response import V1DeepCompareLeaderboardsResponse
|
|
18
|
+
|
|
19
|
+
class TestV1DeepCompareLeaderboardsResponse(unittest.TestCase):
|
|
20
|
+
"""V1DeepCompareLeaderboardsResponse unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1DeepCompareLeaderboardsResponse:
|
|
29
|
+
"""Test V1DeepCompareLeaderboardsResponse
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1DeepCompareLeaderboardsResponse`
|
|
34
|
+
"""
|
|
35
|
+
model = V1DeepCompareLeaderboardsResponse()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1DeepCompareLeaderboardsResponse(
|
|
38
|
+
leaderboard_cmp_report = eval_studio_client.api.models.v1_leaderboard_cmp_report.v1LeaderboardCmpReport(
|
|
39
|
+
summary = '',
|
|
40
|
+
comparison_result = eval_studio_client.api.models.complete_comparison_result_structure.Complete comparison result structure(
|
|
41
|
+
diffs = [
|
|
42
|
+
eval_studio_client.api.models.a_single_diff_item_comparing_two_leaderboards.A single diff item comparing two leaderboards(
|
|
43
|
+
diff_key = '',
|
|
44
|
+
items = [
|
|
45
|
+
eval_studio_client.api.models.a_single_comparison_item_showing_differences_between_baseline_and_current.A single comparison item showing differences between baseline and current(
|
|
46
|
+
question = '',
|
|
47
|
+
diff_flipped_metrics = [
|
|
48
|
+
eval_studio_client.api.models.flipped_metric_information.Flipped metric information(
|
|
49
|
+
metric_name = '',
|
|
50
|
+
baseline_value = 1.337,
|
|
51
|
+
current_value = 1.337, )
|
|
52
|
+
],
|
|
53
|
+
baseline_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
|
|
54
|
+
key = '',
|
|
55
|
+
input = '',
|
|
56
|
+
corpus = [
|
|
57
|
+
''
|
|
58
|
+
],
|
|
59
|
+
context = [
|
|
60
|
+
''
|
|
61
|
+
],
|
|
62
|
+
categories = [
|
|
63
|
+
''
|
|
64
|
+
],
|
|
65
|
+
relationships = [
|
|
66
|
+
eval_studio_client.api.models.test_case_relationship_information.Test case relationship information(
|
|
67
|
+
type = '',
|
|
68
|
+
target = '',
|
|
69
|
+
target_type = '', )
|
|
70
|
+
],
|
|
71
|
+
expected_output = '',
|
|
72
|
+
output_constraints = [
|
|
73
|
+
''
|
|
74
|
+
],
|
|
75
|
+
output_condition = '',
|
|
76
|
+
actual_output = '',
|
|
77
|
+
actual_duration = 1.337,
|
|
78
|
+
cost = 1.337,
|
|
79
|
+
model_key = '',
|
|
80
|
+
test_key = '',
|
|
81
|
+
test_case_key = '',
|
|
82
|
+
metrics = [
|
|
83
|
+
eval_studio_client.api.models.metric_information.Metric information(
|
|
84
|
+
key = '',
|
|
85
|
+
value = 1.337, )
|
|
86
|
+
],
|
|
87
|
+
metrics_meta = {
|
|
88
|
+
'key' : ''
|
|
89
|
+
},
|
|
90
|
+
actual_output_meta = [
|
|
91
|
+
eval_studio_client.api.models.actual_output_metadata.Actual output metadata(
|
|
92
|
+
tokenization = '',
|
|
93
|
+
data = [
|
|
94
|
+
eval_studio_client.api.models.data_fragment.Data fragment(
|
|
95
|
+
text = '',
|
|
96
|
+
meta = {
|
|
97
|
+
'key' : ''
|
|
98
|
+
}, )
|
|
99
|
+
], )
|
|
100
|
+
],
|
|
101
|
+
metric_scores = [
|
|
102
|
+
eval_studio_client.api.models.comparison_metric_score_information_(specific_to_comparison_reports).Comparison metric score information (specific to comparison reports)(
|
|
103
|
+
metric_name = '',
|
|
104
|
+
metric_score = 1.337, )
|
|
105
|
+
],
|
|
106
|
+
result_error_message = '', ),
|
|
107
|
+
baseline_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
|
|
108
|
+
sentences = [
|
|
109
|
+
''
|
|
110
|
+
],
|
|
111
|
+
sentences_count = 56,
|
|
112
|
+
common_sentences = [
|
|
113
|
+
''
|
|
114
|
+
],
|
|
115
|
+
common_count = 56,
|
|
116
|
+
unique_sentences = [
|
|
117
|
+
''
|
|
118
|
+
],
|
|
119
|
+
unique_count = 56,
|
|
120
|
+
identical = True,
|
|
121
|
+
sentence_similarity = {
|
|
122
|
+
'key' : 1.337
|
|
123
|
+
}, ),
|
|
124
|
+
baseline_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
|
|
125
|
+
chunks = [
|
|
126
|
+
''
|
|
127
|
+
],
|
|
128
|
+
chunks_count = 56,
|
|
129
|
+
common_chunks = [
|
|
130
|
+
''
|
|
131
|
+
],
|
|
132
|
+
common_count = 56,
|
|
133
|
+
unique_chunks = [
|
|
134
|
+
''
|
|
135
|
+
],
|
|
136
|
+
unique_count = 56,
|
|
137
|
+
identical = True,
|
|
138
|
+
chunk_similarity = {
|
|
139
|
+
'key' : 1.337
|
|
140
|
+
}, ),
|
|
141
|
+
current_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
|
|
142
|
+
key = '',
|
|
143
|
+
input = '',
|
|
144
|
+
expected_output = '',
|
|
145
|
+
output_condition = '',
|
|
146
|
+
actual_output = '',
|
|
147
|
+
actual_duration = 1.337,
|
|
148
|
+
cost = 1.337,
|
|
149
|
+
model_key = '',
|
|
150
|
+
test_key = '',
|
|
151
|
+
test_case_key = '',
|
|
152
|
+
result_error_message = '', ),
|
|
153
|
+
current_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
|
|
154
|
+
sentences_count = 56,
|
|
155
|
+
common_count = 56,
|
|
156
|
+
unique_count = 56,
|
|
157
|
+
identical = True, ),
|
|
158
|
+
current_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
|
|
159
|
+
chunks_count = 56,
|
|
160
|
+
common_count = 56,
|
|
161
|
+
unique_count = 56,
|
|
162
|
+
identical = True, ), )
|
|
163
|
+
],
|
|
164
|
+
summary = eval_studio_client.api.models.comparison_summary.Comparison summary(
|
|
165
|
+
recommendation_winner = '',
|
|
166
|
+
recommendation = '',
|
|
167
|
+
recommendation_confidence = '', ),
|
|
168
|
+
models_overview = eval_studio_client.api.models.models_overview.Models overview(
|
|
169
|
+
baseline_model_key = '',
|
|
170
|
+
current_model_key = '',
|
|
171
|
+
baseline_model_name = '',
|
|
172
|
+
baseline_collection_id = [
|
|
173
|
+
''
|
|
174
|
+
],
|
|
175
|
+
current_model_name = '',
|
|
176
|
+
current_collection_id = [
|
|
177
|
+
''
|
|
178
|
+
], ),
|
|
179
|
+
models_comparisons = eval_studio_client.api.models.models_comparison_statistics.Models comparison statistics(
|
|
180
|
+
test_case_ranks_baseline = 56,
|
|
181
|
+
test_case_ranks_current = 56,
|
|
182
|
+
test_case_wins_baseline = 56,
|
|
183
|
+
test_case_wins_current = 56, ),
|
|
184
|
+
models_comparisons_metrics = eval_studio_client.api.models.detailed_metrics_comparisons.Detailed metrics comparisons(
|
|
185
|
+
metrics_ranks_baseline = 1.337,
|
|
186
|
+
metrics_ranks_current = 1.337,
|
|
187
|
+
metrics_wins_baseline = 56,
|
|
188
|
+
metrics_wins_current = 56,
|
|
189
|
+
metrics_averages = [
|
|
190
|
+
eval_studio_client.api.models.metric_average_comparison.Metric average comparison(
|
|
191
|
+
metric_key = '',
|
|
192
|
+
baseline_avg = 1.337,
|
|
193
|
+
current_avg = 1.337,
|
|
194
|
+
diff = 1.337,
|
|
195
|
+
baseline_better_wins = 56,
|
|
196
|
+
current_better_wins = 56,
|
|
197
|
+
baseline_rank_avg = 1.337,
|
|
198
|
+
current_rank_avg = 1.337, )
|
|
199
|
+
], ),
|
|
200
|
+
technical_metrics = eval_studio_client.api.models.technical_metrics_for_model_performance.Technical metrics for model performance(
|
|
201
|
+
baseline = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
|
|
202
|
+
cost_sum = 1.337,
|
|
203
|
+
duration_sum = 1.337,
|
|
204
|
+
duration_min = 1.337,
|
|
205
|
+
duration_max = 1.337,
|
|
206
|
+
duration_avg = 1.337, ),
|
|
207
|
+
current = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
|
|
208
|
+
cost_sum = 1.337,
|
|
209
|
+
duration_sum = 1.337,
|
|
210
|
+
duration_min = 1.337,
|
|
211
|
+
duration_max = 1.337,
|
|
212
|
+
duration_avg = 1.337, ), ),
|
|
213
|
+
test_cases_leaderboard = [
|
|
214
|
+
eval_studio_client.api.models.test_case_leaderboard_item.Test case leaderboard item(
|
|
215
|
+
wins = 56,
|
|
216
|
+
question = '',
|
|
217
|
+
changed_metrics_count = 56, )
|
|
218
|
+
], )
|
|
219
|
+
],
|
|
220
|
+
leaderboards = [
|
|
221
|
+
eval_studio_client.api.models.leaderboard_information.Leaderboard information(
|
|
222
|
+
key = '', )
|
|
223
|
+
],
|
|
224
|
+
metrics_meta = {
|
|
225
|
+
'key' : eval_studio_client.api.models.metric_metadata.Metric metadata(
|
|
226
|
+
key = '',
|
|
227
|
+
display_name = '',
|
|
228
|
+
data_type = '',
|
|
229
|
+
display_value = '',
|
|
230
|
+
description = '',
|
|
231
|
+
value_range = [
|
|
232
|
+
1.337
|
|
233
|
+
],
|
|
234
|
+
value_enum = [
|
|
235
|
+
''
|
|
236
|
+
],
|
|
237
|
+
higher_is_better = True,
|
|
238
|
+
threshold = 1.337,
|
|
239
|
+
is_primary_metric = True,
|
|
240
|
+
parent_metric = '',
|
|
241
|
+
exclude = True, )
|
|
242
|
+
}, ), )
|
|
243
|
+
)
|
|
244
|
+
else:
|
|
245
|
+
return V1DeepCompareLeaderboardsResponse(
|
|
246
|
+
)
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
def testV1DeepCompareLeaderboardsResponse(self):
|
|
250
|
+
"""Test V1DeepCompareLeaderboardsResponse"""
|
|
251
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
252
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
253
|
+
|
|
254
|
+
if __name__ == '__main__':
|
|
255
|
+
unittest.main()
|
|
@@ -98,7 +98,8 @@ class TestV1DeleteLeaderboardResponse(unittest.TestCase):
|
|
|
98
98
|
h2ogpte_collection = '',
|
|
99
99
|
type = 'LEADERBOARD_TYPE_UNSPECIFIED',
|
|
100
100
|
demo = True,
|
|
101
|
-
test_lab = '',
|
|
101
|
+
test_lab = '',
|
|
102
|
+
evaluation_type = 'EVALUATION_TYPE_UNSPECIFIED', )
|
|
102
103
|
)
|
|
103
104
|
else:
|
|
104
105
|
return V1DeleteLeaderboardResponse(
|