eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +65 -0
- eval_studio_client/api/api/__init__.py +3 -0
- eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
- eval_studio_client/api/api/dashboard_service_api.py +16 -16
- eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/document_service_api.py +16 -16
- eval_studio_client/api/api/evaluation_service_api.py +12 -12
- eval_studio_client/api/api/evaluator_service_api.py +16 -16
- eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
- eval_studio_client/api/api/leaderboard_service_api.py +554 -16
- eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/model_service_api.py +16 -16
- eval_studio_client/api/api/operation_service_api.py +821 -17
- eval_studio_client/api/api/perturbator_service_api.py +22 -22
- eval_studio_client/api/api/test_case_service_api.py +300 -16
- eval_studio_client/api/api/test_class_service_api.py +16 -16
- eval_studio_client/api/api/test_service_api.py +285 -16
- eval_studio_client/api/api/workflow_node_service_api.py +16 -16
- eval_studio_client/api/api/workflow_service_api.py +16 -16
- eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
- eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
- eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
- eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
- eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
- eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
- eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
- eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
- eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/ModelServiceApi.md +5 -5
- eval_studio_client/api/docs/OperationServiceApi.md +215 -8
- eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
- eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
- eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
- eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
- eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
- eval_studio_client/api/docs/TestServiceApi.md +73 -5
- eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
- eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
- eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
- eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
- eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
- eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
- eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
- eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1DashboardReport.md +31 -0
- eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
- eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1DataFragment.md +31 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
- eval_studio_client/api/docs/V1DiffItem.md +36 -0
- eval_studio_client/api/docs/V1EvaluationType.md +12 -0
- eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
- eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
- eval_studio_client/api/docs/V1HumanDecision.md +12 -0
- eval_studio_client/api/docs/V1Info.md +1 -0
- eval_studio_client/api/docs/V1Leaderboard.md +1 -0
- eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
- eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
- eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
- eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
- eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
- eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1Metric.md +30 -0
- eval_studio_client/api/docs/V1MetricAverage.md +36 -0
- eval_studio_client/api/docs/V1MetricMeta.md +40 -0
- eval_studio_client/api/docs/V1MetricScore.md +1 -1
- eval_studio_client/api/docs/V1MetricScores.md +1 -1
- eval_studio_client/api/docs/V1ModelType.md +1 -1
- eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
- eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
- eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
- eval_studio_client/api/docs/V1Operation.md +1 -0
- eval_studio_client/api/docs/V1OperationView.md +12 -0
- eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
- eval_studio_client/api/docs/V1Stats.md +2 -0
- eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
- eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
- eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
- eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
- eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
- eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
- eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
- eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
- eval_studio_client/api/models/__init__.py +62 -0
- eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
- eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
- eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
- eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
- eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
- eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
- eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
- eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
- eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
- eval_studio_client/api/models/v1_comparison_item.py +130 -0
- eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
- eval_studio_client/api/models/v1_comparison_result.py +120 -0
- eval_studio_client/api/models/v1_comparison_summary.py +91 -0
- eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
- eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
- eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
- eval_studio_client/api/models/v1_dashboard_report.py +109 -0
- eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
- eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_data_fragment.py +91 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
- eval_studio_client/api/models/v1_diff_item.py +137 -0
- eval_studio_client/api/models/v1_evaluation_type.py +39 -0
- eval_studio_client/api/models/v1_flipped_metric.py +91 -0
- eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
- eval_studio_client/api/models/v1_human_decision.py +38 -0
- eval_studio_client/api/models/v1_info.py +4 -2
- eval_studio_client/api/models/v1_leaderboard.py +5 -2
- eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
- eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
- eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
- eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
- eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
- eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_operations_response.py +5 -3
- eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
- eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
- eval_studio_client/api/models/v1_metric.py +89 -0
- eval_studio_client/api/models/v1_metric_average.py +101 -0
- eval_studio_client/api/models/v1_metric_meta.py +109 -0
- eval_studio_client/api/models/v1_metric_score.py +6 -1
- eval_studio_client/api/models/v1_metric_scores.py +1 -1
- eval_studio_client/api/models/v1_model_type.py +2 -1
- eval_studio_client/api/models/v1_models_comparisons.py +93 -0
- eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
- eval_studio_client/api/models/v1_models_overview.py +97 -0
- eval_studio_client/api/models/v1_operation.py +6 -2
- eval_studio_client/api/models/v1_operation_view.py +38 -0
- eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
- eval_studio_client/api/models/v1_stats.py +16 -2
- eval_studio_client/api/models/v1_technical_metrics.py +96 -0
- eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
- eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
- eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
- eval_studio_client/api/models/v1_test_case_result.py +157 -0
- eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
- eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
- eval_studio_client/api/models/v1_workflow_type.py +1 -0
- eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
- eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
- eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
- eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
- eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_operation_service_api.py +18 -0
- eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
- eval_studio_client/api/test/test_test_case_service_api.py +6 -0
- eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
- eval_studio_client/api/test/test_test_service_api.py +6 -0
- eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
- eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
- eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
- eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
- eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
- eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
- eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
- eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
- eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
- eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
- eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
- eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_diff_item.py +226 -0
- eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
- eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
- eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
- eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
- eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
- eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
- eval_studio_client/api/test/test_v1_human_decision.py +33 -0
- eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
- eval_studio_client/api/test/test_v1_info.py +4 -1
- eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
- eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
- eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
- eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
- eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
- eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
- eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
- eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
- eval_studio_client/api/test/test_v1_metric.py +52 -0
- eval_studio_client/api/test/test_v1_metric_average.py +58 -0
- eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
- eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
- eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
- eval_studio_client/api/test/test_v1_models_overview.py +60 -0
- eval_studio_client/api/test/test_v1_operation.py +2 -1
- eval_studio_client/api/test/test_v1_operation_view.py +33 -0
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
- eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
- eval_studio_client/api/test/test_v1_stats.py +3 -1
- eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
- eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
- eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
- eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
- eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
- eval_studio_client/models.py +18 -6
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0.dist-info}/METADATA +2 -2
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0.dist-info}/RECORD +306 -111
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_dashboard_report import V1DashboardReport
|
|
23
|
+
from typing import Optional, Set
|
|
24
|
+
from typing_extensions import Self
|
|
25
|
+
|
|
26
|
+
class V1GetDashboardReportResponse(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
V1GetDashboardReportResponse
|
|
29
|
+
""" # noqa: E501
|
|
30
|
+
dashboard_report: Optional[V1DashboardReport] = Field(default=None, alias="dashboardReport")
|
|
31
|
+
__properties: ClassVar[List[str]] = ["dashboardReport"]
|
|
32
|
+
|
|
33
|
+
model_config = ConfigDict(
|
|
34
|
+
populate_by_name=True,
|
|
35
|
+
validate_assignment=True,
|
|
36
|
+
protected_namespaces=(),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def to_str(self) -> str:
|
|
41
|
+
"""Returns the string representation of the model using alias"""
|
|
42
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
43
|
+
|
|
44
|
+
def to_json(self) -> str:
|
|
45
|
+
"""Returns the JSON representation of the model using alias"""
|
|
46
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
47
|
+
return json.dumps(self.to_dict())
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
51
|
+
"""Create an instance of V1GetDashboardReportResponse from a JSON string"""
|
|
52
|
+
return cls.from_dict(json.loads(json_str))
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
55
|
+
"""Return the dictionary representation of the model using alias.
|
|
56
|
+
|
|
57
|
+
This has the following differences from calling pydantic's
|
|
58
|
+
`self.model_dump(by_alias=True)`:
|
|
59
|
+
|
|
60
|
+
* `None` is only added to the output dict for nullable fields that
|
|
61
|
+
were set at model initialization. Other fields with value `None`
|
|
62
|
+
are ignored.
|
|
63
|
+
"""
|
|
64
|
+
excluded_fields: Set[str] = set([
|
|
65
|
+
])
|
|
66
|
+
|
|
67
|
+
_dict = self.model_dump(
|
|
68
|
+
by_alias=True,
|
|
69
|
+
exclude=excluded_fields,
|
|
70
|
+
exclude_none=True,
|
|
71
|
+
)
|
|
72
|
+
# override the default output from pydantic by calling `to_dict()` of dashboard_report
|
|
73
|
+
if self.dashboard_report:
|
|
74
|
+
_dict['dashboardReport'] = self.dashboard_report.to_dict()
|
|
75
|
+
return _dict
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
79
|
+
"""Create an instance of V1GetDashboardReportResponse from a dict"""
|
|
80
|
+
if obj is None:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
if not isinstance(obj, dict):
|
|
84
|
+
return cls.model_validate(obj, strict=False)
|
|
85
|
+
|
|
86
|
+
_obj = cls.model_validate({
|
|
87
|
+
"dashboardReport": V1DashboardReport.from_dict(obj["dashboardReport"]) if obj.get("dashboardReport") is not None else None
|
|
88
|
+
}, strict=False)
|
|
89
|
+
return _obj
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import json
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing_extensions import Self
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class V1HumanDecision(str, Enum):
|
|
22
|
+
"""
|
|
23
|
+
HumanDecision represents the human decision on the result. - HUMAN_DECISION_UNSPECIFIED: Default value, means no decision has been made. - HUMAN_DECISION_APPROVED: The result was approved by human. - HUMAN_DECISION_REJECTED: The result was rejected by human.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
allowed enum values
|
|
28
|
+
"""
|
|
29
|
+
HUMAN_DECISION_UNSPECIFIED = 'HUMAN_DECISION_UNSPECIFIED'
|
|
30
|
+
HUMAN_DECISION_APPROVED = 'HUMAN_DECISION_APPROVED'
|
|
31
|
+
HUMAN_DECISION_REJECTED = 'HUMAN_DECISION_REJECTED'
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_json(cls, json_str: str) -> Self:
|
|
35
|
+
"""Create an instance of V1HumanDecision from a JSON string"""
|
|
36
|
+
return cls(json.loads(json_str))
|
|
37
|
+
|
|
38
|
+
|
|
@@ -38,7 +38,8 @@ class V1Info(BaseModel):
|
|
|
38
38
|
public_instance: Optional[StrictBool] = Field(default=None, description="If the Eval Studio instance is public.", alias="publicInstance")
|
|
39
39
|
sharing_enabled: Optional[StrictBool] = Field(default=None, description="Whether the sharing capability is enabled.", alias="sharingEnabled")
|
|
40
40
|
experimental_features_enabled: Optional[StrictBool] = Field(default=None, description="Whether the experimental features are enabled.", alias="experimentalFeaturesEnabled")
|
|
41
|
-
|
|
41
|
+
model_type_allowlist: Optional[List[StrictStr]] = Field(default=None, description="Allowlist of model types for UI that can be hosted in Eval Studio. E.g. MODEL_TYPE_H2OGPTE_RAG, MODEL_TYPE_OPENAI_CHAT, MODEL_TYPE_AMAZON_BEDROCK. Use \"*\" to allow all model types.", alias="modelTypeAllowlist")
|
|
42
|
+
__properties: ClassVar[List[str]] = ["baseUrl", "version", "oauth2LoginUrl", "oauth2LogoutUrl", "h2oGpteAllowlist", "h2oGpteClientVersion", "h2oSonarVersion", "preferredLlmsForTestGeneration", "h2oCloudUrl", "publicInstance", "sharingEnabled", "experimentalFeaturesEnabled", "modelTypeAllowlist"]
|
|
42
43
|
|
|
43
44
|
model_config = ConfigDict(
|
|
44
45
|
populate_by_name=True,
|
|
@@ -102,7 +103,8 @@ class V1Info(BaseModel):
|
|
|
102
103
|
"h2oCloudUrl": obj.get("h2oCloudUrl"),
|
|
103
104
|
"publicInstance": obj.get("publicInstance"),
|
|
104
105
|
"sharingEnabled": obj.get("sharingEnabled"),
|
|
105
|
-
"experimentalFeaturesEnabled": obj.get("experimentalFeaturesEnabled")
|
|
106
|
+
"experimentalFeaturesEnabled": obj.get("experimentalFeaturesEnabled"),
|
|
107
|
+
"modelTypeAllowlist": obj.get("modelTypeAllowlist")
|
|
106
108
|
}, strict=False)
|
|
107
109
|
return _obj
|
|
108
110
|
|
|
@@ -20,6 +20,7 @@ import json
|
|
|
20
20
|
from datetime import datetime
|
|
21
21
|
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
|
|
22
22
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
23
|
+
from eval_studio_client.api.models.v1_evaluation_type import V1EvaluationType
|
|
23
24
|
from eval_studio_client.api.models.v1_insight import V1Insight
|
|
24
25
|
from eval_studio_client.api.models.v1_leaderboard_status import V1LeaderboardStatus
|
|
25
26
|
from eval_studio_client.api.models.v1_leaderboard_type import V1LeaderboardType
|
|
@@ -57,7 +58,8 @@ class V1Leaderboard(BaseModel):
|
|
|
57
58
|
type: Optional[V1LeaderboardType] = None
|
|
58
59
|
demo: Optional[StrictBool] = Field(default=None, description="Output only. Whether the Leaderboard is a demo resource or not. Demo resources are read only.")
|
|
59
60
|
test_lab: Optional[StrictStr] = Field(default=None, description="Optional. Resource name of the TestLab if Leaderboard was created from a imported TestLab.", alias="testLab")
|
|
60
|
-
|
|
61
|
+
evaluation_type: Optional[V1EvaluationType] = Field(default=None, alias="evaluationType")
|
|
62
|
+
__properties: ClassVar[List[str]] = ["name", "createTime", "creator", "updateTime", "updater", "deleteTime", "deleter", "displayName", "description", "status", "evaluator", "tests", "model", "createOperation", "leaderboardReport", "leaderboardTable", "leaderboardSummary", "llmModels", "leaderboardProblems", "evaluatorParameters", "insights", "modelParameters", "h2ogpteCollection", "type", "demo", "testLab", "evaluationType"]
|
|
61
63
|
|
|
62
64
|
model_config = ConfigDict(
|
|
63
65
|
populate_by_name=True,
|
|
@@ -175,7 +177,8 @@ class V1Leaderboard(BaseModel):
|
|
|
175
177
|
"h2ogpteCollection": obj.get("h2ogpteCollection"),
|
|
176
178
|
"type": obj.get("type"),
|
|
177
179
|
"demo": obj.get("demo"),
|
|
178
|
-
"testLab": obj.get("testLab")
|
|
180
|
+
"testLab": obj.get("testLab"),
|
|
181
|
+
"evaluationType": obj.get("evaluationType")
|
|
179
182
|
}, strict=False)
|
|
180
183
|
return _obj
|
|
181
184
|
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_comparison_result import V1ComparisonResult
|
|
23
|
+
from typing import Optional, Set
|
|
24
|
+
from typing_extensions import Self
|
|
25
|
+
|
|
26
|
+
class V1LeaderboardCmpReport(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
V1LeaderboardCmpReport
|
|
29
|
+
""" # noqa: E501
|
|
30
|
+
summary: Optional[StrictStr] = Field(default=None, description="Summary of the comparison.")
|
|
31
|
+
comparison_result: Optional[V1ComparisonResult] = Field(default=None, alias="comparisonResult")
|
|
32
|
+
__properties: ClassVar[List[str]] = ["summary", "comparisonResult"]
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(
|
|
35
|
+
populate_by_name=True,
|
|
36
|
+
validate_assignment=True,
|
|
37
|
+
protected_namespaces=(),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_str(self) -> str:
|
|
42
|
+
"""Returns the string representation of the model using alias"""
|
|
43
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
44
|
+
|
|
45
|
+
def to_json(self) -> str:
|
|
46
|
+
"""Returns the JSON representation of the model using alias"""
|
|
47
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
48
|
+
return json.dumps(self.to_dict())
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
52
|
+
"""Create an instance of V1LeaderboardCmpReport from a JSON string"""
|
|
53
|
+
return cls.from_dict(json.loads(json_str))
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Return the dictionary representation of the model using alias.
|
|
57
|
+
|
|
58
|
+
This has the following differences from calling pydantic's
|
|
59
|
+
`self.model_dump(by_alias=True)`:
|
|
60
|
+
|
|
61
|
+
* `None` is only added to the output dict for nullable fields that
|
|
62
|
+
were set at model initialization. Other fields with value `None`
|
|
63
|
+
are ignored.
|
|
64
|
+
"""
|
|
65
|
+
excluded_fields: Set[str] = set([
|
|
66
|
+
])
|
|
67
|
+
|
|
68
|
+
_dict = self.model_dump(
|
|
69
|
+
by_alias=True,
|
|
70
|
+
exclude=excluded_fields,
|
|
71
|
+
exclude_none=True,
|
|
72
|
+
)
|
|
73
|
+
# override the default output from pydantic by calling `to_dict()` of comparison_result
|
|
74
|
+
if self.comparison_result:
|
|
75
|
+
_dict['comparisonResult'] = self.comparison_result.to_dict()
|
|
76
|
+
return _dict
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
80
|
+
"""Create an instance of V1LeaderboardCmpReport from a dict"""
|
|
81
|
+
if obj is None:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
if not isinstance(obj, dict):
|
|
85
|
+
return cls.model_validate(obj, strict=False)
|
|
86
|
+
|
|
87
|
+
_obj = cls.model_validate({
|
|
88
|
+
"summary": obj.get("summary"),
|
|
89
|
+
"comparisonResult": V1ComparisonResult.from_dict(obj["comparisonResult"]) if obj.get("comparisonResult") is not None else None
|
|
90
|
+
}, strict=False)
|
|
91
|
+
return _obj
|
|
92
|
+
|
|
93
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from typing import Optional, Set
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
class V1LeaderboardComparisonItem(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
V1LeaderboardComparisonItem
|
|
28
|
+
""" # noqa: E501
|
|
29
|
+
wins: Optional[StrictInt] = Field(default=None, description="Number of wins.")
|
|
30
|
+
question: Optional[StrictStr] = Field(default=None, description="Question text.")
|
|
31
|
+
changed_metrics_count: Optional[StrictInt] = Field(default=None, description="Count of changed metrics.", alias="changedMetricsCount")
|
|
32
|
+
__properties: ClassVar[List[str]] = ["wins", "question", "changedMetricsCount"]
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(
|
|
35
|
+
populate_by_name=True,
|
|
36
|
+
validate_assignment=True,
|
|
37
|
+
protected_namespaces=(),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_str(self) -> str:
|
|
42
|
+
"""Returns the string representation of the model using alias"""
|
|
43
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
44
|
+
|
|
45
|
+
def to_json(self) -> str:
|
|
46
|
+
"""Returns the JSON representation of the model using alias"""
|
|
47
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
48
|
+
return json.dumps(self.to_dict())
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
52
|
+
"""Create an instance of V1LeaderboardComparisonItem from a JSON string"""
|
|
53
|
+
return cls.from_dict(json.loads(json_str))
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Return the dictionary representation of the model using alias.
|
|
57
|
+
|
|
58
|
+
This has the following differences from calling pydantic's
|
|
59
|
+
`self.model_dump(by_alias=True)`:
|
|
60
|
+
|
|
61
|
+
* `None` is only added to the output dict for nullable fields that
|
|
62
|
+
were set at model initialization. Other fields with value `None`
|
|
63
|
+
are ignored.
|
|
64
|
+
"""
|
|
65
|
+
excluded_fields: Set[str] = set([
|
|
66
|
+
])
|
|
67
|
+
|
|
68
|
+
_dict = self.model_dump(
|
|
69
|
+
by_alias=True,
|
|
70
|
+
exclude=excluded_fields,
|
|
71
|
+
exclude_none=True,
|
|
72
|
+
)
|
|
73
|
+
return _dict
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
77
|
+
"""Create an instance of V1LeaderboardComparisonItem from a dict"""
|
|
78
|
+
if obj is None:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
if not isinstance(obj, dict):
|
|
82
|
+
return cls.model_validate(obj, strict=False)
|
|
83
|
+
|
|
84
|
+
_obj = cls.model_validate({
|
|
85
|
+
"wins": obj.get("wins"),
|
|
86
|
+
"question": obj.get("question"),
|
|
87
|
+
"changedMetricsCount": obj.get("changedMetricsCount")
|
|
88
|
+
}, strict=False)
|
|
89
|
+
return _obj
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_leaderboard_comparison_item import V1LeaderboardComparisonItem
|
|
23
|
+
from typing import Optional, Set
|
|
24
|
+
from typing_extensions import Self
|
|
25
|
+
|
|
26
|
+
class V1LeaderboardInfo(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
V1LeaderboardInfo
|
|
29
|
+
""" # noqa: E501
|
|
30
|
+
key: Optional[StrictStr] = Field(default=None, description="Unique key identifying the leaderboard pair (format: \"baseline_id|current_id\").")
|
|
31
|
+
items: Optional[List[V1LeaderboardComparisonItem]] = Field(default=None, description="List of leaderboard comparison items.")
|
|
32
|
+
__properties: ClassVar[List[str]] = ["key", "items"]
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(
|
|
35
|
+
populate_by_name=True,
|
|
36
|
+
validate_assignment=True,
|
|
37
|
+
protected_namespaces=(),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_str(self) -> str:
|
|
42
|
+
"""Returns the string representation of the model using alias"""
|
|
43
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
44
|
+
|
|
45
|
+
def to_json(self) -> str:
|
|
46
|
+
"""Returns the JSON representation of the model using alias"""
|
|
47
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
48
|
+
return json.dumps(self.to_dict())
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
52
|
+
"""Create an instance of V1LeaderboardInfo from a JSON string"""
|
|
53
|
+
return cls.from_dict(json.loads(json_str))
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Return the dictionary representation of the model using alias.
|
|
57
|
+
|
|
58
|
+
This has the following differences from calling pydantic's
|
|
59
|
+
`self.model_dump(by_alias=True)`:
|
|
60
|
+
|
|
61
|
+
* `None` is only added to the output dict for nullable fields that
|
|
62
|
+
were set at model initialization. Other fields with value `None`
|
|
63
|
+
are ignored.
|
|
64
|
+
"""
|
|
65
|
+
excluded_fields: Set[str] = set([
|
|
66
|
+
])
|
|
67
|
+
|
|
68
|
+
_dict = self.model_dump(
|
|
69
|
+
by_alias=True,
|
|
70
|
+
exclude=excluded_fields,
|
|
71
|
+
exclude_none=True,
|
|
72
|
+
)
|
|
73
|
+
# override the default output from pydantic by calling `to_dict()` of each item in items (list)
|
|
74
|
+
_items = []
|
|
75
|
+
if self.items:
|
|
76
|
+
for _item_items in self.items:
|
|
77
|
+
if _item_items:
|
|
78
|
+
_items.append(_item_items.to_dict())
|
|
79
|
+
_dict['items'] = _items
|
|
80
|
+
return _dict
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
84
|
+
"""Create an instance of V1LeaderboardInfo from a dict"""
|
|
85
|
+
if obj is None:
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
if not isinstance(obj, dict):
|
|
89
|
+
return cls.model_validate(obj, strict=False)
|
|
90
|
+
|
|
91
|
+
_obj = cls.model_validate({
|
|
92
|
+
"key": obj.get("key"),
|
|
93
|
+
"items": [V1LeaderboardComparisonItem.from_dict(_item) for _item in obj["items"]] if obj.get("items") is not None else None
|
|
94
|
+
}, strict=False)
|
|
95
|
+
return _obj
|
|
96
|
+
|
|
97
|
+
|
|
@@ -19,17 +19,23 @@ import json
|
|
|
19
19
|
|
|
20
20
|
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
21
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_agent_chat_activity_diagram import V1AgentChatActivityDiagram
|
|
23
|
+
from eval_studio_client.api.models.v1_agent_chat_scripts_bar_chart import V1AgentChatScriptsBarChart
|
|
24
|
+
from eval_studio_client.api.models.v1_agent_chat_tools_bar_chart import V1AgentChatToolsBarChart
|
|
22
25
|
from eval_studio_client.api.models.v1_leaderboard_report_actual_output_data import V1LeaderboardReportActualOutputData
|
|
23
26
|
from typing import Optional, Set
|
|
24
27
|
from typing_extensions import Self
|
|
25
28
|
|
|
26
29
|
class V1LeaderboardReportActualOutputMeta(BaseModel):
|
|
27
30
|
"""
|
|
28
|
-
ActualOutputMeta represents the metadata about the actual output.
|
|
31
|
+
ActualOutputMeta represents the metadata about the actual output. Each instance can contain any combination of the fields below.
|
|
29
32
|
""" # noqa: E501
|
|
30
|
-
tokenization: Optional[StrictStr] = Field(default=None, description="
|
|
31
|
-
data: Optional[List[V1LeaderboardReportActualOutputData]] = Field(default=None, description="
|
|
32
|
-
|
|
33
|
+
tokenization: Optional[StrictStr] = Field(default=None, description="Optional. Actual output data tokenization like sentence_level_punkt.")
|
|
34
|
+
data: Optional[List[V1LeaderboardReportActualOutputData]] = Field(default=None, description="Optional. Actual output data - list of text fragments coupled with the metric values.")
|
|
35
|
+
agent_chat_activity_diagram: Optional[V1AgentChatActivityDiagram] = Field(default=None, alias="agentChatActivityDiagram")
|
|
36
|
+
agent_chat_tools_bar_chart: Optional[V1AgentChatToolsBarChart] = Field(default=None, alias="agentChatToolsBarChart")
|
|
37
|
+
agent_chat_scripts_bar_chart: Optional[V1AgentChatScriptsBarChart] = Field(default=None, alias="agentChatScriptsBarChart")
|
|
38
|
+
__properties: ClassVar[List[str]] = ["tokenization", "data", "agentChatActivityDiagram", "agentChatToolsBarChart", "agentChatScriptsBarChart"]
|
|
33
39
|
|
|
34
40
|
model_config = ConfigDict(
|
|
35
41
|
populate_by_name=True,
|
|
@@ -61,12 +67,8 @@ class V1LeaderboardReportActualOutputMeta(BaseModel):
|
|
|
61
67
|
* `None` is only added to the output dict for nullable fields that
|
|
62
68
|
were set at model initialization. Other fields with value `None`
|
|
63
69
|
are ignored.
|
|
64
|
-
* OpenAPI `readOnly` fields are excluded.
|
|
65
|
-
* OpenAPI `readOnly` fields are excluded.
|
|
66
70
|
"""
|
|
67
71
|
excluded_fields: Set[str] = set([
|
|
68
|
-
"tokenization",
|
|
69
|
-
"data",
|
|
70
72
|
])
|
|
71
73
|
|
|
72
74
|
_dict = self.model_dump(
|
|
@@ -81,6 +83,15 @@ class V1LeaderboardReportActualOutputMeta(BaseModel):
|
|
|
81
83
|
if _item_data:
|
|
82
84
|
_items.append(_item_data.to_dict())
|
|
83
85
|
_dict['data'] = _items
|
|
86
|
+
# override the default output from pydantic by calling `to_dict()` of agent_chat_activity_diagram
|
|
87
|
+
if self.agent_chat_activity_diagram:
|
|
88
|
+
_dict['agentChatActivityDiagram'] = self.agent_chat_activity_diagram.to_dict()
|
|
89
|
+
# override the default output from pydantic by calling `to_dict()` of agent_chat_tools_bar_chart
|
|
90
|
+
if self.agent_chat_tools_bar_chart:
|
|
91
|
+
_dict['agentChatToolsBarChart'] = self.agent_chat_tools_bar_chart.to_dict()
|
|
92
|
+
# override the default output from pydantic by calling `to_dict()` of agent_chat_scripts_bar_chart
|
|
93
|
+
if self.agent_chat_scripts_bar_chart:
|
|
94
|
+
_dict['agentChatScriptsBarChart'] = self.agent_chat_scripts_bar_chart.to_dict()
|
|
84
95
|
return _dict
|
|
85
96
|
|
|
86
97
|
@classmethod
|
|
@@ -94,7 +105,10 @@ class V1LeaderboardReportActualOutputMeta(BaseModel):
|
|
|
94
105
|
|
|
95
106
|
_obj = cls.model_validate({
|
|
96
107
|
"tokenization": obj.get("tokenization"),
|
|
97
|
-
"data": [V1LeaderboardReportActualOutputData.from_dict(_item) for _item in obj["data"]] if obj.get("data") is not None else None
|
|
108
|
+
"data": [V1LeaderboardReportActualOutputData.from_dict(_item) for _item in obj["data"]] if obj.get("data") is not None else None,
|
|
109
|
+
"agentChatActivityDiagram": V1AgentChatActivityDiagram.from_dict(obj["agentChatActivityDiagram"]) if obj.get("agentChatActivityDiagram") is not None else None,
|
|
110
|
+
"agentChatToolsBarChart": V1AgentChatToolsBarChart.from_dict(obj["agentChatToolsBarChart"]) if obj.get("agentChatToolsBarChart") is not None else None,
|
|
111
|
+
"agentChatScriptsBarChart": V1AgentChatScriptsBarChart.from_dict(obj["agentChatScriptsBarChart"]) if obj.get("agentChatScriptsBarChart") is not None else None
|
|
98
112
|
}, strict=False)
|
|
99
113
|
return _obj
|
|
100
114
|
|
|
@@ -19,6 +19,7 @@ import json
|
|
|
19
19
|
|
|
20
20
|
from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt, StrictStr
|
|
21
21
|
from typing import Any, ClassVar, Dict, List, Optional, Union
|
|
22
|
+
from eval_studio_client.api.models.v1_human_decision import V1HumanDecision
|
|
22
23
|
from eval_studio_client.api.models.v1_leaderboard_report_actual_output_meta import V1LeaderboardReportActualOutputMeta
|
|
23
24
|
from eval_studio_client.api.models.v1_leaderboard_report_result_relationship import V1LeaderboardReportResultRelationship
|
|
24
25
|
from eval_studio_client.api.models.v1_metric_score import V1MetricScore
|
|
@@ -31,22 +32,25 @@ class V1LeaderboardReportResult(BaseModel):
|
|
|
31
32
|
""" # noqa: E501
|
|
32
33
|
key: Optional[StrictStr] = Field(default=None, description="Output only. Composite unique key of the result formed by the model key and test case key.")
|
|
33
34
|
input: Optional[StrictStr] = Field(default=None, description="Output only. Input prompt or text to be processed.")
|
|
34
|
-
corpus: Optional[List[StrictStr]] = Field(default=None, description="Output only. Collection of corpus documents to be used during evaluation.")
|
|
35
|
-
context: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of contextual information or references.")
|
|
36
|
-
categories: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of categories or labels for classification.")
|
|
37
|
-
relationships: Optional[List[V1LeaderboardReportResultRelationship]] = Field(default=None, description="Output only. List of relationships or associations between entities.")
|
|
35
|
+
corpus: Optional[List[StrictStr]] = Field(default=None, description="Output only. Collection of corpus documents to be used during evaluation. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
|
|
36
|
+
context: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of contextual information or references. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
|
|
37
|
+
categories: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of categories or labels for classification. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
|
|
38
|
+
relationships: Optional[List[V1LeaderboardReportResultRelationship]] = Field(default=None, description="Output only. List of relationships or associations between entities. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
|
|
38
39
|
expected_output: Optional[StrictStr] = Field(default=None, description="Output only. Expected output or target result.", alias="expectedOutput")
|
|
39
|
-
output_constraints: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of constraints that should be applied to the output.", alias="outputConstraints")
|
|
40
|
-
output_condition: Optional[StrictStr] = Field(default=None, description="Output only. Condition that output should satisfy.", alias="outputCondition")
|
|
40
|
+
output_constraints: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of constraints that should be applied to the output. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.", alias="outputConstraints")
|
|
41
|
+
output_condition: Optional[StrictStr] = Field(default=None, description="Output only. Condition that output should satisfy. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.", alias="outputCondition")
|
|
41
42
|
actual_output: Optional[StrictStr] = Field(default=None, description="Output only. Actual output produced by the model.", alias="actualOutput")
|
|
42
|
-
actual_duration: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Duration of processing in seconds.", alias="actualDuration")
|
|
43
|
-
cost: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Cost of processing in currency units.")
|
|
43
|
+
actual_duration: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Duration of processing in seconds. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.", alias="actualDuration")
|
|
44
|
+
cost: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Cost of processing in currency units. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
|
|
44
45
|
model_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the model used.", alias="modelKey")
|
|
45
46
|
test_case_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the test case.", alias="testCaseKey")
|
|
46
47
|
metrics: Optional[List[V1MetricScore]] = Field(default=None, description="Optional. All metrics values for the result.")
|
|
47
48
|
result_error_message: Optional[StrictStr] = Field(default=None, description="Output only. Error message if processing resulted in failure.", alias="resultErrorMessage")
|
|
48
49
|
actual_output_meta: Optional[List[V1LeaderboardReportActualOutputMeta]] = Field(default=None, description="Output only. Additional metadata about the actual output.", alias="actualOutputMeta")
|
|
49
|
-
|
|
50
|
+
human_decision: Optional[V1HumanDecision] = Field(default=None, alias="humanDecision")
|
|
51
|
+
comment: Optional[StrictStr] = Field(default=None, description="Output only. Optional comment about the result.")
|
|
52
|
+
annotations: Optional[Dict[str, Dict[str, Any]]] = Field(default=None, description="Output only. Annotations associated with the test case result.")
|
|
53
|
+
__properties: ClassVar[List[str]] = ["key", "input", "corpus", "context", "categories", "relationships", "expectedOutput", "outputConstraints", "outputCondition", "actualOutput", "actualDuration", "cost", "modelKey", "testCaseKey", "metrics", "resultErrorMessage", "actualOutputMeta", "humanDecision", "comment", "annotations"]
|
|
50
54
|
|
|
51
55
|
model_config = ConfigDict(
|
|
52
56
|
populate_by_name=True,
|
|
@@ -94,6 +98,8 @@ class V1LeaderboardReportResult(BaseModel):
|
|
|
94
98
|
* OpenAPI `readOnly` fields are excluded.
|
|
95
99
|
* OpenAPI `readOnly` fields are excluded.
|
|
96
100
|
* OpenAPI `readOnly` fields are excluded.
|
|
101
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
102
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
97
103
|
"""
|
|
98
104
|
excluded_fields: Set[str] = set([
|
|
99
105
|
"key",
|
|
@@ -112,6 +118,8 @@ class V1LeaderboardReportResult(BaseModel):
|
|
|
112
118
|
"test_case_key",
|
|
113
119
|
"result_error_message",
|
|
114
120
|
"actual_output_meta",
|
|
121
|
+
"comment",
|
|
122
|
+
"annotations",
|
|
115
123
|
])
|
|
116
124
|
|
|
117
125
|
_dict = self.model_dump(
|
|
@@ -168,7 +176,10 @@ class V1LeaderboardReportResult(BaseModel):
|
|
|
168
176
|
"testCaseKey": obj.get("testCaseKey"),
|
|
169
177
|
"metrics": [V1MetricScore.from_dict(_item) for _item in obj["metrics"]] if obj.get("metrics") is not None else None,
|
|
170
178
|
"resultErrorMessage": obj.get("resultErrorMessage"),
|
|
171
|
-
"actualOutputMeta": [V1LeaderboardReportActualOutputMeta.from_dict(_item) for _item in obj["actualOutputMeta"]] if obj.get("actualOutputMeta") is not None else None
|
|
179
|
+
"actualOutputMeta": [V1LeaderboardReportActualOutputMeta.from_dict(_item) for _item in obj["actualOutputMeta"]] if obj.get("actualOutputMeta") is not None else None,
|
|
180
|
+
"humanDecision": obj.get("humanDecision"),
|
|
181
|
+
"comment": obj.get("comment"),
|
|
182
|
+
"annotations": obj.get("annotations")
|
|
172
183
|
}, strict=False)
|
|
173
184
|
return _obj
|
|
174
185
|
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import json
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing_extensions import Self
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class V1LeaderboardReportResultView(str, Enum):
|
|
22
|
+
"""
|
|
23
|
+
- LEADERBOARD_REPORT_RESULT_VIEW_UNSPECIFIED: Default value. The basic view with essential fields. - LEADERBOARD_REPORT_RESULT_VIEW_FULL: Full view with all fields populated. - LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY: Summary view with only key fields.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
allowed enum values
|
|
28
|
+
"""
|
|
29
|
+
LEADERBOARD_REPORT_RESULT_VIEW_UNSPECIFIED = 'LEADERBOARD_REPORT_RESULT_VIEW_UNSPECIFIED'
|
|
30
|
+
LEADERBOARD_REPORT_RESULT_VIEW_FULL = 'LEADERBOARD_REPORT_RESULT_VIEW_FULL'
|
|
31
|
+
LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY = 'LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY'
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def from_json(cls, json_str: str) -> Self:
|
|
35
|
+
"""Create an instance of V1LeaderboardReportResultView from a JSON string"""
|
|
36
|
+
return cls(json.loads(json_str))
|
|
37
|
+
|
|
38
|
+
|