eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +65 -0
- eval_studio_client/api/api/__init__.py +3 -0
- eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
- eval_studio_client/api/api/dashboard_service_api.py +16 -16
- eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/document_service_api.py +16 -16
- eval_studio_client/api/api/evaluation_service_api.py +12 -12
- eval_studio_client/api/api/evaluator_service_api.py +16 -16
- eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
- eval_studio_client/api/api/leaderboard_service_api.py +554 -16
- eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/model_service_api.py +16 -16
- eval_studio_client/api/api/operation_service_api.py +821 -17
- eval_studio_client/api/api/perturbator_service_api.py +22 -22
- eval_studio_client/api/api/test_case_service_api.py +300 -16
- eval_studio_client/api/api/test_class_service_api.py +16 -16
- eval_studio_client/api/api/test_service_api.py +285 -16
- eval_studio_client/api/api/workflow_node_service_api.py +16 -16
- eval_studio_client/api/api/workflow_service_api.py +16 -16
- eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
- eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
- eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
- eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
- eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
- eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
- eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
- eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
- eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/ModelServiceApi.md +5 -5
- eval_studio_client/api/docs/OperationServiceApi.md +215 -8
- eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
- eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
- eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
- eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
- eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
- eval_studio_client/api/docs/TestServiceApi.md +73 -5
- eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
- eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
- eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
- eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
- eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
- eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
- eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
- eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1DashboardReport.md +31 -0
- eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
- eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1DataFragment.md +31 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
- eval_studio_client/api/docs/V1DiffItem.md +36 -0
- eval_studio_client/api/docs/V1EvaluationType.md +12 -0
- eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
- eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
- eval_studio_client/api/docs/V1HumanDecision.md +12 -0
- eval_studio_client/api/docs/V1Info.md +1 -0
- eval_studio_client/api/docs/V1Leaderboard.md +1 -0
- eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
- eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
- eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
- eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
- eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
- eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1Metric.md +30 -0
- eval_studio_client/api/docs/V1MetricAverage.md +36 -0
- eval_studio_client/api/docs/V1MetricMeta.md +40 -0
- eval_studio_client/api/docs/V1MetricScore.md +1 -1
- eval_studio_client/api/docs/V1MetricScores.md +1 -1
- eval_studio_client/api/docs/V1ModelType.md +1 -1
- eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
- eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
- eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
- eval_studio_client/api/docs/V1Operation.md +1 -0
- eval_studio_client/api/docs/V1OperationView.md +12 -0
- eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
- eval_studio_client/api/docs/V1Stats.md +2 -0
- eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
- eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
- eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
- eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
- eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
- eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
- eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
- eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
- eval_studio_client/api/models/__init__.py +62 -0
- eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
- eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
- eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
- eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
- eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
- eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
- eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
- eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
- eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
- eval_studio_client/api/models/v1_comparison_item.py +130 -0
- eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
- eval_studio_client/api/models/v1_comparison_result.py +120 -0
- eval_studio_client/api/models/v1_comparison_summary.py +91 -0
- eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
- eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
- eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
- eval_studio_client/api/models/v1_dashboard_report.py +109 -0
- eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
- eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_data_fragment.py +91 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
- eval_studio_client/api/models/v1_diff_item.py +137 -0
- eval_studio_client/api/models/v1_evaluation_type.py +39 -0
- eval_studio_client/api/models/v1_flipped_metric.py +91 -0
- eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
- eval_studio_client/api/models/v1_human_decision.py +38 -0
- eval_studio_client/api/models/v1_info.py +4 -2
- eval_studio_client/api/models/v1_leaderboard.py +5 -2
- eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
- eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
- eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
- eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
- eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
- eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_operations_response.py +5 -3
- eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
- eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
- eval_studio_client/api/models/v1_metric.py +89 -0
- eval_studio_client/api/models/v1_metric_average.py +101 -0
- eval_studio_client/api/models/v1_metric_meta.py +109 -0
- eval_studio_client/api/models/v1_metric_score.py +6 -1
- eval_studio_client/api/models/v1_metric_scores.py +1 -1
- eval_studio_client/api/models/v1_model_type.py +2 -1
- eval_studio_client/api/models/v1_models_comparisons.py +93 -0
- eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
- eval_studio_client/api/models/v1_models_overview.py +97 -0
- eval_studio_client/api/models/v1_operation.py +6 -2
- eval_studio_client/api/models/v1_operation_view.py +38 -0
- eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
- eval_studio_client/api/models/v1_stats.py +16 -2
- eval_studio_client/api/models/v1_technical_metrics.py +96 -0
- eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
- eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
- eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
- eval_studio_client/api/models/v1_test_case_result.py +157 -0
- eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
- eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
- eval_studio_client/api/models/v1_workflow_type.py +1 -0
- eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
- eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
- eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
- eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
- eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_operation_service_api.py +18 -0
- eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
- eval_studio_client/api/test/test_test_case_service_api.py +6 -0
- eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
- eval_studio_client/api/test/test_test_service_api.py +6 -0
- eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
- eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
- eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
- eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
- eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
- eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
- eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
- eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
- eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
- eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
- eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
- eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_diff_item.py +226 -0
- eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
- eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
- eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
- eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
- eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
- eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
- eval_studio_client/api/test/test_v1_human_decision.py +33 -0
- eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
- eval_studio_client/api/test/test_v1_info.py +4 -1
- eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
- eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
- eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
- eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
- eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
- eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
- eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
- eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
- eval_studio_client/api/test/test_v1_metric.py +52 -0
- eval_studio_client/api/test/test_v1_metric_average.py +58 -0
- eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
- eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
- eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
- eval_studio_client/api/test/test_v1_models_overview.py +60 -0
- eval_studio_client/api/test/test_v1_operation.py +2 -1
- eval_studio_client/api/test/test_v1_operation_view.py +33 -0
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
- eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
- eval_studio_client/api/test/test_v1_stats.py +3 -1
- eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
- eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
- eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
- eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
- eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
- eval_studio_client/models.py +18 -6
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/METADATA +2 -2
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/RECORD +306 -111
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_leaderboard_cmp_report import V1LeaderboardCmpReport
|
|
18
|
+
|
|
19
|
+
class TestV1LeaderboardCmpReport(unittest.TestCase):
|
|
20
|
+
"""V1LeaderboardCmpReport unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1LeaderboardCmpReport:
|
|
29
|
+
"""Test V1LeaderboardCmpReport
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1LeaderboardCmpReport`
|
|
34
|
+
"""
|
|
35
|
+
model = V1LeaderboardCmpReport()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1LeaderboardCmpReport(
|
|
38
|
+
summary = '',
|
|
39
|
+
comparison_result = eval_studio_client.api.models.complete_comparison_result_structure.Complete comparison result structure(
|
|
40
|
+
diffs = [
|
|
41
|
+
eval_studio_client.api.models.a_single_diff_item_comparing_two_leaderboards.A single diff item comparing two leaderboards(
|
|
42
|
+
diff_key = '',
|
|
43
|
+
items = [
|
|
44
|
+
eval_studio_client.api.models.a_single_comparison_item_showing_differences_between_baseline_and_current.A single comparison item showing differences between baseline and current(
|
|
45
|
+
question = '',
|
|
46
|
+
diff_flipped_metrics = [
|
|
47
|
+
eval_studio_client.api.models.flipped_metric_information.Flipped metric information(
|
|
48
|
+
metric_name = '',
|
|
49
|
+
baseline_value = 1.337,
|
|
50
|
+
current_value = 1.337, )
|
|
51
|
+
],
|
|
52
|
+
baseline_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
|
|
53
|
+
key = '',
|
|
54
|
+
input = '',
|
|
55
|
+
corpus = [
|
|
56
|
+
''
|
|
57
|
+
],
|
|
58
|
+
context = [
|
|
59
|
+
''
|
|
60
|
+
],
|
|
61
|
+
categories = [
|
|
62
|
+
''
|
|
63
|
+
],
|
|
64
|
+
relationships = [
|
|
65
|
+
eval_studio_client.api.models.test_case_relationship_information.Test case relationship information(
|
|
66
|
+
type = '',
|
|
67
|
+
target = '',
|
|
68
|
+
target_type = '', )
|
|
69
|
+
],
|
|
70
|
+
expected_output = '',
|
|
71
|
+
output_constraints = [
|
|
72
|
+
''
|
|
73
|
+
],
|
|
74
|
+
output_condition = '',
|
|
75
|
+
actual_output = '',
|
|
76
|
+
actual_duration = 1.337,
|
|
77
|
+
cost = 1.337,
|
|
78
|
+
model_key = '',
|
|
79
|
+
test_key = '',
|
|
80
|
+
test_case_key = '',
|
|
81
|
+
metrics = [
|
|
82
|
+
eval_studio_client.api.models.metric_information.Metric information(
|
|
83
|
+
key = '',
|
|
84
|
+
value = 1.337, )
|
|
85
|
+
],
|
|
86
|
+
metrics_meta = {
|
|
87
|
+
'key' : ''
|
|
88
|
+
},
|
|
89
|
+
actual_output_meta = [
|
|
90
|
+
eval_studio_client.api.models.actual_output_metadata.Actual output metadata(
|
|
91
|
+
tokenization = '',
|
|
92
|
+
data = [
|
|
93
|
+
eval_studio_client.api.models.data_fragment.Data fragment(
|
|
94
|
+
text = '',
|
|
95
|
+
meta = {
|
|
96
|
+
'key' : ''
|
|
97
|
+
}, )
|
|
98
|
+
], )
|
|
99
|
+
],
|
|
100
|
+
metric_scores = [
|
|
101
|
+
eval_studio_client.api.models.comparison_metric_score_information_(specific_to_comparison_reports).Comparison metric score information (specific to comparison reports)(
|
|
102
|
+
metric_name = '',
|
|
103
|
+
metric_score = 1.337, )
|
|
104
|
+
],
|
|
105
|
+
result_error_message = '', ),
|
|
106
|
+
baseline_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
|
|
107
|
+
sentences = [
|
|
108
|
+
''
|
|
109
|
+
],
|
|
110
|
+
sentences_count = 56,
|
|
111
|
+
common_sentences = [
|
|
112
|
+
''
|
|
113
|
+
],
|
|
114
|
+
common_count = 56,
|
|
115
|
+
unique_sentences = [
|
|
116
|
+
''
|
|
117
|
+
],
|
|
118
|
+
unique_count = 56,
|
|
119
|
+
identical = True,
|
|
120
|
+
sentence_similarity = {
|
|
121
|
+
'key' : 1.337
|
|
122
|
+
}, ),
|
|
123
|
+
baseline_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
|
|
124
|
+
chunks = [
|
|
125
|
+
''
|
|
126
|
+
],
|
|
127
|
+
chunks_count = 56,
|
|
128
|
+
common_chunks = [
|
|
129
|
+
''
|
|
130
|
+
],
|
|
131
|
+
common_count = 56,
|
|
132
|
+
unique_chunks = [
|
|
133
|
+
''
|
|
134
|
+
],
|
|
135
|
+
unique_count = 56,
|
|
136
|
+
identical = True,
|
|
137
|
+
chunk_similarity = {
|
|
138
|
+
'key' : 1.337
|
|
139
|
+
}, ),
|
|
140
|
+
current_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
|
|
141
|
+
key = '',
|
|
142
|
+
input = '',
|
|
143
|
+
expected_output = '',
|
|
144
|
+
output_condition = '',
|
|
145
|
+
actual_output = '',
|
|
146
|
+
actual_duration = 1.337,
|
|
147
|
+
cost = 1.337,
|
|
148
|
+
model_key = '',
|
|
149
|
+
test_key = '',
|
|
150
|
+
test_case_key = '',
|
|
151
|
+
result_error_message = '', ),
|
|
152
|
+
current_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
|
|
153
|
+
sentences_count = 56,
|
|
154
|
+
common_count = 56,
|
|
155
|
+
unique_count = 56,
|
|
156
|
+
identical = True, ),
|
|
157
|
+
current_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
|
|
158
|
+
chunks_count = 56,
|
|
159
|
+
common_count = 56,
|
|
160
|
+
unique_count = 56,
|
|
161
|
+
identical = True, ), )
|
|
162
|
+
],
|
|
163
|
+
summary = eval_studio_client.api.models.comparison_summary.Comparison summary(
|
|
164
|
+
recommendation_winner = '',
|
|
165
|
+
recommendation = '',
|
|
166
|
+
recommendation_confidence = '', ),
|
|
167
|
+
models_overview = eval_studio_client.api.models.models_overview.Models overview(
|
|
168
|
+
baseline_model_key = '',
|
|
169
|
+
current_model_key = '',
|
|
170
|
+
baseline_model_name = '',
|
|
171
|
+
baseline_collection_id = [
|
|
172
|
+
''
|
|
173
|
+
],
|
|
174
|
+
current_model_name = '',
|
|
175
|
+
current_collection_id = [
|
|
176
|
+
''
|
|
177
|
+
], ),
|
|
178
|
+
models_comparisons = eval_studio_client.api.models.models_comparison_statistics.Models comparison statistics(
|
|
179
|
+
test_case_ranks_baseline = 56,
|
|
180
|
+
test_case_ranks_current = 56,
|
|
181
|
+
test_case_wins_baseline = 56,
|
|
182
|
+
test_case_wins_current = 56, ),
|
|
183
|
+
models_comparisons_metrics = eval_studio_client.api.models.detailed_metrics_comparisons.Detailed metrics comparisons(
|
|
184
|
+
metrics_ranks_baseline = 1.337,
|
|
185
|
+
metrics_ranks_current = 1.337,
|
|
186
|
+
metrics_wins_baseline = 56,
|
|
187
|
+
metrics_wins_current = 56,
|
|
188
|
+
metrics_averages = [
|
|
189
|
+
eval_studio_client.api.models.metric_average_comparison.Metric average comparison(
|
|
190
|
+
metric_key = '',
|
|
191
|
+
baseline_avg = 1.337,
|
|
192
|
+
current_avg = 1.337,
|
|
193
|
+
diff = 1.337,
|
|
194
|
+
baseline_better_wins = 56,
|
|
195
|
+
current_better_wins = 56,
|
|
196
|
+
baseline_rank_avg = 1.337,
|
|
197
|
+
current_rank_avg = 1.337, )
|
|
198
|
+
], ),
|
|
199
|
+
technical_metrics = eval_studio_client.api.models.technical_metrics_for_model_performance.Technical metrics for model performance(
|
|
200
|
+
baseline = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
|
|
201
|
+
cost_sum = 1.337,
|
|
202
|
+
duration_sum = 1.337,
|
|
203
|
+
duration_min = 1.337,
|
|
204
|
+
duration_max = 1.337,
|
|
205
|
+
duration_avg = 1.337, ),
|
|
206
|
+
current = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
|
|
207
|
+
cost_sum = 1.337,
|
|
208
|
+
duration_sum = 1.337,
|
|
209
|
+
duration_min = 1.337,
|
|
210
|
+
duration_max = 1.337,
|
|
211
|
+
duration_avg = 1.337, ), ),
|
|
212
|
+
test_cases_leaderboard = [
|
|
213
|
+
eval_studio_client.api.models.test_case_leaderboard_item.Test case leaderboard item(
|
|
214
|
+
wins = 56,
|
|
215
|
+
question = '',
|
|
216
|
+
changed_metrics_count = 56, )
|
|
217
|
+
], )
|
|
218
|
+
],
|
|
219
|
+
leaderboards = [
|
|
220
|
+
eval_studio_client.api.models.leaderboard_information.Leaderboard information(
|
|
221
|
+
key = '', )
|
|
222
|
+
],
|
|
223
|
+
metrics_meta = {
|
|
224
|
+
'key' : eval_studio_client.api.models.metric_metadata.Metric metadata(
|
|
225
|
+
key = '',
|
|
226
|
+
display_name = '',
|
|
227
|
+
data_type = '',
|
|
228
|
+
display_value = '',
|
|
229
|
+
description = '',
|
|
230
|
+
value_range = [
|
|
231
|
+
1.337
|
|
232
|
+
],
|
|
233
|
+
value_enum = [
|
|
234
|
+
''
|
|
235
|
+
],
|
|
236
|
+
higher_is_better = True,
|
|
237
|
+
threshold = 1.337,
|
|
238
|
+
is_primary_metric = True,
|
|
239
|
+
parent_metric = '',
|
|
240
|
+
exclude = True, )
|
|
241
|
+
}, )
|
|
242
|
+
)
|
|
243
|
+
else:
|
|
244
|
+
return V1LeaderboardCmpReport(
|
|
245
|
+
)
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
def testV1LeaderboardCmpReport(self):
|
|
249
|
+
"""Test V1LeaderboardCmpReport"""
|
|
250
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
251
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
252
|
+
|
|
253
|
+
if __name__ == '__main__':
|
|
254
|
+
unittest.main()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_leaderboard_comparison_item import V1LeaderboardComparisonItem
|
|
18
|
+
|
|
19
|
+
class TestV1LeaderboardComparisonItem(unittest.TestCase):
|
|
20
|
+
"""V1LeaderboardComparisonItem unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1LeaderboardComparisonItem:
|
|
29
|
+
"""Test V1LeaderboardComparisonItem
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1LeaderboardComparisonItem`
|
|
34
|
+
"""
|
|
35
|
+
model = V1LeaderboardComparisonItem()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1LeaderboardComparisonItem(
|
|
38
|
+
wins = 56,
|
|
39
|
+
question = '',
|
|
40
|
+
changed_metrics_count = 56
|
|
41
|
+
)
|
|
42
|
+
else:
|
|
43
|
+
return V1LeaderboardComparisonItem(
|
|
44
|
+
)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def testV1LeaderboardComparisonItem(self):
|
|
48
|
+
"""Test V1LeaderboardComparisonItem"""
|
|
49
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
50
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
51
|
+
|
|
52
|
+
if __name__ == '__main__':
|
|
53
|
+
unittest.main()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_leaderboard_info import V1LeaderboardInfo
|
|
18
|
+
|
|
19
|
+
class TestV1LeaderboardInfo(unittest.TestCase):
|
|
20
|
+
"""V1LeaderboardInfo unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1LeaderboardInfo:
|
|
29
|
+
"""Test V1LeaderboardInfo
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1LeaderboardInfo`
|
|
34
|
+
"""
|
|
35
|
+
model = V1LeaderboardInfo()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1LeaderboardInfo(
|
|
38
|
+
key = '',
|
|
39
|
+
items = [
|
|
40
|
+
eval_studio_client.api.models.leaderboard_comparison_item.Leaderboard comparison item(
|
|
41
|
+
wins = 56,
|
|
42
|
+
question = '',
|
|
43
|
+
changed_metrics_count = 56, )
|
|
44
|
+
]
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
return V1LeaderboardInfo(
|
|
48
|
+
)
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def testV1LeaderboardInfo(self):
|
|
52
|
+
"""Test V1LeaderboardInfo"""
|
|
53
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
54
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
55
|
+
|
|
56
|
+
if __name__ == '__main__':
|
|
57
|
+
unittest.main()
|
|
@@ -76,8 +76,45 @@ class TestV1LeaderboardReport(unittest.TestCase):
|
|
|
76
76
|
data = [
|
|
77
77
|
eval_studio_client.api.models.v1_leaderboard_report_actual_output_data.v1LeaderboardReportActualOutputData(
|
|
78
78
|
text = '', )
|
|
79
|
-
],
|
|
80
|
-
|
|
79
|
+
],
|
|
80
|
+
agent_chat_activity_diagram = eval_studio_client.api.models.v1_agent_chat_activity_diagram.v1AgentChatActivityDiagram(
|
|
81
|
+
rows = [
|
|
82
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_row.v1AgentChatActivityDiagramRow(
|
|
83
|
+
nodes = [
|
|
84
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_node.v1AgentChatActivityDiagramNode(
|
|
85
|
+
id = '',
|
|
86
|
+
role = '',
|
|
87
|
+
label = '', )
|
|
88
|
+
], )
|
|
89
|
+
],
|
|
90
|
+
edges = [
|
|
91
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_edge.v1AgentChatActivityDiagramEdge(
|
|
92
|
+
from = '',
|
|
93
|
+
to = '',
|
|
94
|
+
label = '', )
|
|
95
|
+
], ),
|
|
96
|
+
agent_chat_tools_bar_chart = eval_studio_client.api.models.v1_agent_chat_tools_bar_chart.v1AgentChatToolsBarChart(
|
|
97
|
+
tools = {
|
|
98
|
+
'key' : eval_studio_client.api.models.v1_agent_chat_tool_usage.v1AgentChatToolUsage(
|
|
99
|
+
name = '',
|
|
100
|
+
success_count = 56,
|
|
101
|
+
failure_count = 56,
|
|
102
|
+
total_count = 56, )
|
|
103
|
+
}, ),
|
|
104
|
+
agent_chat_scripts_bar_chart = eval_studio_client.api.models.v1_agent_chat_scripts_bar_chart.v1AgentChatScriptsBarChart(
|
|
105
|
+
scripts = {
|
|
106
|
+
'key' : eval_studio_client.api.models.v1_agent_chat_script_usage.v1AgentChatScriptUsage(
|
|
107
|
+
name = '',
|
|
108
|
+
success_count = 56,
|
|
109
|
+
failure_count = 56,
|
|
110
|
+
total_count = 56, )
|
|
111
|
+
}, ), )
|
|
112
|
+
],
|
|
113
|
+
human_decision = 'HUMAN_DECISION_UNSPECIFIED',
|
|
114
|
+
comment = '',
|
|
115
|
+
annotations = {
|
|
116
|
+
'key' : None
|
|
117
|
+
}, )
|
|
81
118
|
],
|
|
82
119
|
models = [
|
|
83
120
|
eval_studio_client.api.models.v1_leaderboard_report_model.v1LeaderboardReportModel(
|
|
@@ -40,7 +40,39 @@ class TestV1LeaderboardReportActualOutputMeta(unittest.TestCase):
|
|
|
40
40
|
eval_studio_client.api.models.v1_leaderboard_report_actual_output_data.v1LeaderboardReportActualOutputData(
|
|
41
41
|
text = '',
|
|
42
42
|
metrics = eval_studio_client.api.models.metrics.metrics(), )
|
|
43
|
-
]
|
|
43
|
+
],
|
|
44
|
+
agent_chat_activity_diagram = eval_studio_client.api.models.v1_agent_chat_activity_diagram.v1AgentChatActivityDiagram(
|
|
45
|
+
rows = [
|
|
46
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_row.v1AgentChatActivityDiagramRow(
|
|
47
|
+
nodes = [
|
|
48
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_node.v1AgentChatActivityDiagramNode(
|
|
49
|
+
id = '',
|
|
50
|
+
role = '',
|
|
51
|
+
label = '', )
|
|
52
|
+
], )
|
|
53
|
+
],
|
|
54
|
+
edges = [
|
|
55
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_edge.v1AgentChatActivityDiagramEdge(
|
|
56
|
+
from = '',
|
|
57
|
+
to = '',
|
|
58
|
+
label = '', )
|
|
59
|
+
], ),
|
|
60
|
+
agent_chat_tools_bar_chart = eval_studio_client.api.models.v1_agent_chat_tools_bar_chart.v1AgentChatToolsBarChart(
|
|
61
|
+
tools = {
|
|
62
|
+
'key' : eval_studio_client.api.models.v1_agent_chat_tool_usage.v1AgentChatToolUsage(
|
|
63
|
+
name = '',
|
|
64
|
+
success_count = 56,
|
|
65
|
+
failure_count = 56,
|
|
66
|
+
total_count = 56, )
|
|
67
|
+
}, ),
|
|
68
|
+
agent_chat_scripts_bar_chart = eval_studio_client.api.models.v1_agent_chat_scripts_bar_chart.v1AgentChatScriptsBarChart(
|
|
69
|
+
scripts = {
|
|
70
|
+
'key' : eval_studio_client.api.models.v1_agent_chat_script_usage.v1AgentChatScriptUsage(
|
|
71
|
+
name = '',
|
|
72
|
+
success_count = 56,
|
|
73
|
+
failure_count = 56,
|
|
74
|
+
total_count = 56, )
|
|
75
|
+
}, )
|
|
44
76
|
)
|
|
45
77
|
else:
|
|
46
78
|
return V1LeaderboardReportActualOutputMeta(
|
|
@@ -75,8 +75,45 @@ class TestV1LeaderboardReportResult(unittest.TestCase):
|
|
|
75
75
|
eval_studio_client.api.models.v1_leaderboard_report_actual_output_data.v1LeaderboardReportActualOutputData(
|
|
76
76
|
text = '',
|
|
77
77
|
metrics = eval_studio_client.api.models.metrics.metrics(), )
|
|
78
|
-
],
|
|
79
|
-
|
|
78
|
+
],
|
|
79
|
+
agent_chat_activity_diagram = eval_studio_client.api.models.v1_agent_chat_activity_diagram.v1AgentChatActivityDiagram(
|
|
80
|
+
rows = [
|
|
81
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_row.v1AgentChatActivityDiagramRow(
|
|
82
|
+
nodes = [
|
|
83
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_node.v1AgentChatActivityDiagramNode(
|
|
84
|
+
id = '',
|
|
85
|
+
role = '',
|
|
86
|
+
label = '', )
|
|
87
|
+
], )
|
|
88
|
+
],
|
|
89
|
+
edges = [
|
|
90
|
+
eval_studio_client.api.models.v1_agent_chat_activity_diagram_edge.v1AgentChatActivityDiagramEdge(
|
|
91
|
+
from = '',
|
|
92
|
+
to = '',
|
|
93
|
+
label = '', )
|
|
94
|
+
], ),
|
|
95
|
+
agent_chat_tools_bar_chart = eval_studio_client.api.models.v1_agent_chat_tools_bar_chart.v1AgentChatToolsBarChart(
|
|
96
|
+
tools = {
|
|
97
|
+
'key' : eval_studio_client.api.models.v1_agent_chat_tool_usage.v1AgentChatToolUsage(
|
|
98
|
+
name = '',
|
|
99
|
+
success_count = 56,
|
|
100
|
+
failure_count = 56,
|
|
101
|
+
total_count = 56, )
|
|
102
|
+
}, ),
|
|
103
|
+
agent_chat_scripts_bar_chart = eval_studio_client.api.models.v1_agent_chat_scripts_bar_chart.v1AgentChatScriptsBarChart(
|
|
104
|
+
scripts = {
|
|
105
|
+
'key' : eval_studio_client.api.models.v1_agent_chat_script_usage.v1AgentChatScriptUsage(
|
|
106
|
+
name = '',
|
|
107
|
+
success_count = 56,
|
|
108
|
+
failure_count = 56,
|
|
109
|
+
total_count = 56, )
|
|
110
|
+
}, ), )
|
|
111
|
+
],
|
|
112
|
+
human_decision = 'HUMAN_DECISION_UNSPECIFIED',
|
|
113
|
+
comment = '',
|
|
114
|
+
annotations = {
|
|
115
|
+
'key' : None
|
|
116
|
+
}
|
|
80
117
|
)
|
|
81
118
|
else:
|
|
82
119
|
return V1LeaderboardReportResult(
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_leaderboard_report_result_view import V1LeaderboardReportResultView
|
|
18
|
+
|
|
19
|
+
class TestV1LeaderboardReportResultView(unittest.TestCase):
|
|
20
|
+
"""V1LeaderboardReportResultView unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def testV1LeaderboardReportResultView(self):
|
|
29
|
+
"""Test V1LeaderboardReportResultView"""
|
|
30
|
+
# inst = V1LeaderboardReportResultView()
|
|
31
|
+
|
|
32
|
+
if __name__ == '__main__':
|
|
33
|
+
unittest.main()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_leaderboard_test_case_annotation import V1LeaderboardTestCaseAnnotation
|
|
18
|
+
|
|
19
|
+
class TestV1LeaderboardTestCaseAnnotation(unittest.TestCase):
|
|
20
|
+
"""V1LeaderboardTestCaseAnnotation unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1LeaderboardTestCaseAnnotation:
|
|
29
|
+
"""Test V1LeaderboardTestCaseAnnotation
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1LeaderboardTestCaseAnnotation`
|
|
34
|
+
"""
|
|
35
|
+
model = V1LeaderboardTestCaseAnnotation()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1LeaderboardTestCaseAnnotation(
|
|
38
|
+
name = '',
|
|
39
|
+
create_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
|
|
40
|
+
creator = '',
|
|
41
|
+
update_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
|
|
42
|
+
updater = '',
|
|
43
|
+
parent = '',
|
|
44
|
+
key = '',
|
|
45
|
+
value = None
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
return V1LeaderboardTestCaseAnnotation(
|
|
49
|
+
)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def testV1LeaderboardTestCaseAnnotation(self):
|
|
53
|
+
"""Test V1LeaderboardTestCaseAnnotation"""
|
|
54
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
55
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
56
|
+
|
|
57
|
+
if __name__ == '__main__':
|
|
58
|
+
unittest.main()
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
import unittest
|
|
16
|
+
|
|
17
|
+
from eval_studio_client.api.models.v1_list_dashboard_test_case_annotations_response import V1ListDashboardTestCaseAnnotationsResponse
|
|
18
|
+
|
|
19
|
+
class TestV1ListDashboardTestCaseAnnotationsResponse(unittest.TestCase):
|
|
20
|
+
"""V1ListDashboardTestCaseAnnotationsResponse unit test stubs"""
|
|
21
|
+
|
|
22
|
+
def setUp(self):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def tearDown(self):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def make_instance(self, include_optional) -> V1ListDashboardTestCaseAnnotationsResponse:
|
|
29
|
+
"""Test V1ListDashboardTestCaseAnnotationsResponse
|
|
30
|
+
include_option is a boolean, when False only required
|
|
31
|
+
params are included, when True both required and
|
|
32
|
+
optional params are included """
|
|
33
|
+
# uncomment below to create an instance of `V1ListDashboardTestCaseAnnotationsResponse`
|
|
34
|
+
"""
|
|
35
|
+
model = V1ListDashboardTestCaseAnnotationsResponse()
|
|
36
|
+
if include_optional:
|
|
37
|
+
return V1ListDashboardTestCaseAnnotationsResponse(
|
|
38
|
+
dashboard_test_case_annotations = [
|
|
39
|
+
eval_studio_client.api.models.v1_dashboard_test_case_annotation.v1DashboardTestCaseAnnotation(
|
|
40
|
+
name = '',
|
|
41
|
+
create_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
|
|
42
|
+
creator = '',
|
|
43
|
+
update_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
|
|
44
|
+
updater = '',
|
|
45
|
+
parent = '',
|
|
46
|
+
key = '',
|
|
47
|
+
value = eval_studio_client.api.models.value.value(), )
|
|
48
|
+
]
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
return V1ListDashboardTestCaseAnnotationsResponse(
|
|
52
|
+
)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def testV1ListDashboardTestCaseAnnotationsResponse(self):
|
|
56
|
+
"""Test V1ListDashboardTestCaseAnnotationsResponse"""
|
|
57
|
+
# inst_req_only = self.make_instance(include_optional=False)
|
|
58
|
+
# inst_req_and_optional = self.make_instance(include_optional=True)
|
|
59
|
+
|
|
60
|
+
if __name__ == '__main__':
|
|
61
|
+
unittest.main()
|