eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_studio_client/api/__init__.py +65 -0
- eval_studio_client/api/api/__init__.py +3 -0
- eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
- eval_studio_client/api/api/dashboard_service_api.py +16 -16
- eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/document_service_api.py +16 -16
- eval_studio_client/api/api/evaluation_service_api.py +12 -12
- eval_studio_client/api/api/evaluator_service_api.py +16 -16
- eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
- eval_studio_client/api/api/leaderboard_service_api.py +554 -16
- eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
- eval_studio_client/api/api/model_service_api.py +16 -16
- eval_studio_client/api/api/operation_service_api.py +821 -17
- eval_studio_client/api/api/perturbator_service_api.py +22 -22
- eval_studio_client/api/api/test_case_service_api.py +300 -16
- eval_studio_client/api/api/test_class_service_api.py +16 -16
- eval_studio_client/api/api/test_service_api.py +285 -16
- eval_studio_client/api/api/workflow_node_service_api.py +16 -16
- eval_studio_client/api/api/workflow_service_api.py +16 -16
- eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
- eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
- eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
- eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
- eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
- eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
- eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
- eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
- eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
- eval_studio_client/api/docs/ModelServiceApi.md +5 -5
- eval_studio_client/api/docs/OperationServiceApi.md +215 -8
- eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
- eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
- eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
- eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
- eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
- eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
- eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
- eval_studio_client/api/docs/TestServiceApi.md +73 -5
- eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
- eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
- eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
- eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
- eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
- eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
- eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
- eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
- eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
- eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
- eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
- eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
- eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
- eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
- eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
- eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
- eval_studio_client/api/docs/V1DashboardReport.md +31 -0
- eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
- eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1DataFragment.md +31 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
- eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
- eval_studio_client/api/docs/V1DiffItem.md +36 -0
- eval_studio_client/api/docs/V1EvaluationType.md +12 -0
- eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
- eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
- eval_studio_client/api/docs/V1HumanDecision.md +12 -0
- eval_studio_client/api/docs/V1Info.md +1 -0
- eval_studio_client/api/docs/V1Leaderboard.md +1 -0
- eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
- eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
- eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
- eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
- eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
- eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
- eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
- eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
- eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
- eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
- eval_studio_client/api/docs/V1Metric.md +30 -0
- eval_studio_client/api/docs/V1MetricAverage.md +36 -0
- eval_studio_client/api/docs/V1MetricMeta.md +40 -0
- eval_studio_client/api/docs/V1MetricScore.md +1 -1
- eval_studio_client/api/docs/V1MetricScores.md +1 -1
- eval_studio_client/api/docs/V1ModelType.md +1 -1
- eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
- eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
- eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
- eval_studio_client/api/docs/V1Operation.md +1 -0
- eval_studio_client/api/docs/V1OperationView.md +12 -0
- eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
- eval_studio_client/api/docs/V1Stats.md +2 -0
- eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
- eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
- eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
- eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
- eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
- eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
- eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
- eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
- eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
- eval_studio_client/api/models/__init__.py +62 -0
- eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
- eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
- eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
- eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
- eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
- eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
- eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
- eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
- eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
- eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
- eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
- eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
- eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
- eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
- eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
- eval_studio_client/api/models/v1_comparison_item.py +130 -0
- eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
- eval_studio_client/api/models/v1_comparison_result.py +120 -0
- eval_studio_client/api/models/v1_comparison_summary.py +91 -0
- eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
- eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
- eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
- eval_studio_client/api/models/v1_dashboard_report.py +109 -0
- eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
- eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_data_fragment.py +91 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
- eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
- eval_studio_client/api/models/v1_diff_item.py +137 -0
- eval_studio_client/api/models/v1_evaluation_type.py +39 -0
- eval_studio_client/api/models/v1_flipped_metric.py +91 -0
- eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
- eval_studio_client/api/models/v1_human_decision.py +38 -0
- eval_studio_client/api/models/v1_info.py +4 -2
- eval_studio_client/api/models/v1_leaderboard.py +5 -2
- eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
- eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
- eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
- eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
- eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
- eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
- eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
- eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
- eval_studio_client/api/models/v1_list_operations_response.py +5 -3
- eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
- eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
- eval_studio_client/api/models/v1_metric.py +89 -0
- eval_studio_client/api/models/v1_metric_average.py +101 -0
- eval_studio_client/api/models/v1_metric_meta.py +109 -0
- eval_studio_client/api/models/v1_metric_score.py +6 -1
- eval_studio_client/api/models/v1_metric_scores.py +1 -1
- eval_studio_client/api/models/v1_model_type.py +2 -1
- eval_studio_client/api/models/v1_models_comparisons.py +93 -0
- eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
- eval_studio_client/api/models/v1_models_overview.py +97 -0
- eval_studio_client/api/models/v1_operation.py +6 -2
- eval_studio_client/api/models/v1_operation_view.py +38 -0
- eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
- eval_studio_client/api/models/v1_stats.py +16 -2
- eval_studio_client/api/models/v1_technical_metrics.py +96 -0
- eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
- eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
- eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
- eval_studio_client/api/models/v1_test_case_result.py +157 -0
- eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
- eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
- eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
- eval_studio_client/api/models/v1_workflow_type.py +1 -0
- eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
- eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
- eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
- eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
- eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
- eval_studio_client/api/test/test_operation_service_api.py +18 -0
- eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
- eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
- eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
- eval_studio_client/api/test/test_test_case_service_api.py +6 -0
- eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
- eval_studio_client/api/test/test_test_service_api.py +6 -0
- eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
- eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
- eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
- eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
- eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
- eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
- eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
- eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
- eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
- eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
- eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
- eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
- eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
- eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
- eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
- eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
- eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
- eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
- eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
- eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
- eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_diff_item.py +226 -0
- eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
- eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
- eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
- eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
- eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
- eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
- eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
- eval_studio_client/api/test/test_v1_human_decision.py +33 -0
- eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
- eval_studio_client/api/test/test_v1_info.py +4 -1
- eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
- eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
- eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
- eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
- eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
- eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
- eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
- eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
- eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
- eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
- eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
- eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
- eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
- eval_studio_client/api/test/test_v1_metric.py +52 -0
- eval_studio_client/api/test/test_v1_metric_average.py +58 -0
- eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
- eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
- eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
- eval_studio_client/api/test/test_v1_models_overview.py +60 -0
- eval_studio_client/api/test/test_v1_operation.py +2 -1
- eval_studio_client/api/test/test_v1_operation_view.py +33 -0
- eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
- eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
- eval_studio_client/api/test/test_v1_stats.py +3 -1
- eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
- eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
- eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
- eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
- eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
- eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
- eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
- eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
- eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
- eval_studio_client/models.py +18 -6
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/METADATA +2 -2
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/RECORD +306 -111
- {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_diff_item import V1DiffItem
|
|
23
|
+
from eval_studio_client.api.models.v1_leaderboard_info import V1LeaderboardInfo
|
|
24
|
+
from eval_studio_client.api.models.v1_metric_meta import V1MetricMeta
|
|
25
|
+
from typing import Optional, Set
|
|
26
|
+
from typing_extensions import Self
|
|
27
|
+
|
|
28
|
+
class V1ComparisonResult(BaseModel):
|
|
29
|
+
"""
|
|
30
|
+
V1ComparisonResult
|
|
31
|
+
""" # noqa: E501
|
|
32
|
+
diffs: Optional[List[V1DiffItem]] = Field(default=None, description="List of differences between leaderboards.")
|
|
33
|
+
leaderboards: Optional[List[V1LeaderboardInfo]] = Field(default=None, description="Leaderboard information.")
|
|
34
|
+
metrics_meta: Optional[Dict[str, V1MetricMeta]] = Field(default=None, description="Metadata about metrics.", alias="metricsMeta")
|
|
35
|
+
__properties: ClassVar[List[str]] = ["diffs", "leaderboards", "metricsMeta"]
|
|
36
|
+
|
|
37
|
+
model_config = ConfigDict(
|
|
38
|
+
populate_by_name=True,
|
|
39
|
+
validate_assignment=True,
|
|
40
|
+
protected_namespaces=(),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def to_str(self) -> str:
|
|
45
|
+
"""Returns the string representation of the model using alias"""
|
|
46
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
47
|
+
|
|
48
|
+
def to_json(self) -> str:
|
|
49
|
+
"""Returns the JSON representation of the model using alias"""
|
|
50
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
51
|
+
return json.dumps(self.to_dict())
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
55
|
+
"""Create an instance of V1ComparisonResult from a JSON string"""
|
|
56
|
+
return cls.from_dict(json.loads(json_str))
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
59
|
+
"""Return the dictionary representation of the model using alias.
|
|
60
|
+
|
|
61
|
+
This has the following differences from calling pydantic's
|
|
62
|
+
`self.model_dump(by_alias=True)`:
|
|
63
|
+
|
|
64
|
+
* `None` is only added to the output dict for nullable fields that
|
|
65
|
+
were set at model initialization. Other fields with value `None`
|
|
66
|
+
are ignored.
|
|
67
|
+
"""
|
|
68
|
+
excluded_fields: Set[str] = set([
|
|
69
|
+
])
|
|
70
|
+
|
|
71
|
+
_dict = self.model_dump(
|
|
72
|
+
by_alias=True,
|
|
73
|
+
exclude=excluded_fields,
|
|
74
|
+
exclude_none=True,
|
|
75
|
+
)
|
|
76
|
+
# override the default output from pydantic by calling `to_dict()` of each item in diffs (list)
|
|
77
|
+
_items = []
|
|
78
|
+
if self.diffs:
|
|
79
|
+
for _item_diffs in self.diffs:
|
|
80
|
+
if _item_diffs:
|
|
81
|
+
_items.append(_item_diffs.to_dict())
|
|
82
|
+
_dict['diffs'] = _items
|
|
83
|
+
# override the default output from pydantic by calling `to_dict()` of each item in leaderboards (list)
|
|
84
|
+
_items = []
|
|
85
|
+
if self.leaderboards:
|
|
86
|
+
for _item_leaderboards in self.leaderboards:
|
|
87
|
+
if _item_leaderboards:
|
|
88
|
+
_items.append(_item_leaderboards.to_dict())
|
|
89
|
+
_dict['leaderboards'] = _items
|
|
90
|
+
# override the default output from pydantic by calling `to_dict()` of each value in metrics_meta (dict)
|
|
91
|
+
_field_dict = {}
|
|
92
|
+
if self.metrics_meta:
|
|
93
|
+
for _key_metrics_meta in self.metrics_meta:
|
|
94
|
+
if self.metrics_meta[_key_metrics_meta]:
|
|
95
|
+
_field_dict[_key_metrics_meta] = self.metrics_meta[_key_metrics_meta].to_dict()
|
|
96
|
+
_dict['metricsMeta'] = _field_dict
|
|
97
|
+
return _dict
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
101
|
+
"""Create an instance of V1ComparisonResult from a dict"""
|
|
102
|
+
if obj is None:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
if not isinstance(obj, dict):
|
|
106
|
+
return cls.model_validate(obj, strict=False)
|
|
107
|
+
|
|
108
|
+
_obj = cls.model_validate({
|
|
109
|
+
"diffs": [V1DiffItem.from_dict(_item) for _item in obj["diffs"]] if obj.get("diffs") is not None else None,
|
|
110
|
+
"leaderboards": [V1LeaderboardInfo.from_dict(_item) for _item in obj["leaderboards"]] if obj.get("leaderboards") is not None else None,
|
|
111
|
+
"metricsMeta": dict(
|
|
112
|
+
(_k, V1MetricMeta.from_dict(_v))
|
|
113
|
+
for _k, _v in obj["metricsMeta"].items()
|
|
114
|
+
)
|
|
115
|
+
if obj.get("metricsMeta") is not None
|
|
116
|
+
else None
|
|
117
|
+
}, strict=False)
|
|
118
|
+
return _obj
|
|
119
|
+
|
|
120
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from typing import Optional, Set
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
class V1ComparisonSummary(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
V1ComparisonSummary
|
|
28
|
+
""" # noqa: E501
|
|
29
|
+
recommendation_winner: Optional[StrictStr] = Field(default=None, description="Winner of the comparison (baseline, current, or tie).", alias="recommendationWinner")
|
|
30
|
+
recommendation: Optional[StrictStr] = Field(default=None, description="Recommendation text.")
|
|
31
|
+
recommendation_confidence: Optional[StrictStr] = Field(default=None, description="Confidence level of the recommendation.", alias="recommendationConfidence")
|
|
32
|
+
__properties: ClassVar[List[str]] = ["recommendationWinner", "recommendation", "recommendationConfidence"]
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(
|
|
35
|
+
populate_by_name=True,
|
|
36
|
+
validate_assignment=True,
|
|
37
|
+
protected_namespaces=(),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_str(self) -> str:
|
|
42
|
+
"""Returns the string representation of the model using alias"""
|
|
43
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
44
|
+
|
|
45
|
+
def to_json(self) -> str:
|
|
46
|
+
"""Returns the JSON representation of the model using alias"""
|
|
47
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
48
|
+
return json.dumps(self.to_dict())
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
52
|
+
"""Create an instance of V1ComparisonSummary from a JSON string"""
|
|
53
|
+
return cls.from_dict(json.loads(json_str))
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
56
|
+
"""Return the dictionary representation of the model using alias.
|
|
57
|
+
|
|
58
|
+
This has the following differences from calling pydantic's
|
|
59
|
+
`self.model_dump(by_alias=True)`:
|
|
60
|
+
|
|
61
|
+
* `None` is only added to the output dict for nullable fields that
|
|
62
|
+
were set at model initialization. Other fields with value `None`
|
|
63
|
+
are ignored.
|
|
64
|
+
"""
|
|
65
|
+
excluded_fields: Set[str] = set([
|
|
66
|
+
])
|
|
67
|
+
|
|
68
|
+
_dict = self.model_dump(
|
|
69
|
+
by_alias=True,
|
|
70
|
+
exclude=excluded_fields,
|
|
71
|
+
exclude_none=True,
|
|
72
|
+
)
|
|
73
|
+
return _dict
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
77
|
+
"""Create an instance of V1ComparisonSummary from a dict"""
|
|
78
|
+
if obj is None:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
if not isinstance(obj, dict):
|
|
82
|
+
return cls.model_validate(obj, strict=False)
|
|
83
|
+
|
|
84
|
+
_obj = cls.model_validate({
|
|
85
|
+
"recommendationWinner": obj.get("recommendationWinner"),
|
|
86
|
+
"recommendation": obj.get("recommendation"),
|
|
87
|
+
"recommendationConfidence": obj.get("recommendationConfidence")
|
|
88
|
+
}, strict=False)
|
|
89
|
+
return _obj
|
|
90
|
+
|
|
91
|
+
|
|
@@ -20,6 +20,7 @@ import json
|
|
|
20
20
|
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
|
|
21
21
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from eval_studio_client.api.models.v1_evaluation_test import V1EvaluationTest
|
|
23
|
+
from eval_studio_client.api.models.v1_evaluation_type import V1EvaluationType
|
|
23
24
|
from eval_studio_client.api.models.v1_model import V1Model
|
|
24
25
|
from typing import Optional, Set
|
|
25
26
|
from typing_extensions import Self
|
|
@@ -38,7 +39,8 @@ class V1CreateEvaluationRequest(BaseModel):
|
|
|
38
39
|
model_parameters: Optional[StrictStr] = Field(default=None, description="Optional. Parameters overrides in JSON format.", alias="modelParameters")
|
|
39
40
|
h2ogpte_collection: Optional[StrictStr] = Field(default=None, description="The existing collection name in H2OGPTe.", alias="h2ogpteCollection")
|
|
40
41
|
default_h2ogpte_model: Optional[V1Model] = Field(default=None, alias="defaultH2ogpteModel")
|
|
41
|
-
|
|
42
|
+
evaluation_type: Optional[V1EvaluationType] = Field(default=None, alias="evaluationType")
|
|
43
|
+
__properties: ClassVar[List[str]] = ["evaluatorIdentifiers", "model", "evaluationTests", "operation", "llmModels", "useCache", "evaluatorsParameters", "modelParameters", "h2ogpteCollection", "defaultH2ogpteModel", "evaluationType"]
|
|
42
44
|
|
|
43
45
|
model_config = ConfigDict(
|
|
44
46
|
populate_by_name=True,
|
|
@@ -113,7 +115,8 @@ class V1CreateEvaluationRequest(BaseModel):
|
|
|
113
115
|
"evaluatorsParameters": obj.get("evaluatorsParameters"),
|
|
114
116
|
"modelParameters": obj.get("modelParameters"),
|
|
115
117
|
"h2ogpteCollection": obj.get("h2ogpteCollection"),
|
|
116
|
-
"defaultH2ogpteModel": V1Model.from_dict(obj["defaultH2ogpteModel"]) if obj.get("defaultH2ogpteModel") is not None else None
|
|
118
|
+
"defaultH2ogpteModel": V1Model.from_dict(obj["defaultH2ogpteModel"]) if obj.get("defaultH2ogpteModel") is not None else None,
|
|
119
|
+
"evaluationType": obj.get("evaluationType")
|
|
117
120
|
}, strict=False)
|
|
118
121
|
return _obj
|
|
119
122
|
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from typing import Optional, Set
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
class V1CreateTestFromTestCasesRequest(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
V1CreateTestFromTestCasesRequest
|
|
28
|
+
""" # noqa: E501
|
|
29
|
+
tests_json: Optional[StrictStr] = Field(default=None, description="Test Cases in JSON format.", alias="testsJson")
|
|
30
|
+
url: Optional[StrictStr] = Field(default=None, description="URL pointing to the Test Cases in JSON format to import.")
|
|
31
|
+
test_display_name: Optional[StrictStr] = Field(default=None, description="Required. Display name of the newly created Test.", alias="testDisplayName")
|
|
32
|
+
test_description: Optional[StrictStr] = Field(default=None, description="Optional. Description of the newly created Tests.", alias="testDescription")
|
|
33
|
+
__properties: ClassVar[List[str]] = ["testsJson", "url", "testDisplayName", "testDescription"]
|
|
34
|
+
|
|
35
|
+
model_config = ConfigDict(
|
|
36
|
+
populate_by_name=True,
|
|
37
|
+
validate_assignment=True,
|
|
38
|
+
protected_namespaces=(),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def to_str(self) -> str:
|
|
43
|
+
"""Returns the string representation of the model using alias"""
|
|
44
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
45
|
+
|
|
46
|
+
def to_json(self) -> str:
|
|
47
|
+
"""Returns the JSON representation of the model using alias"""
|
|
48
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
49
|
+
return json.dumps(self.to_dict())
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
53
|
+
"""Create an instance of V1CreateTestFromTestCasesRequest from a JSON string"""
|
|
54
|
+
return cls.from_dict(json.loads(json_str))
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
57
|
+
"""Return the dictionary representation of the model using alias.
|
|
58
|
+
|
|
59
|
+
This has the following differences from calling pydantic's
|
|
60
|
+
`self.model_dump(by_alias=True)`:
|
|
61
|
+
|
|
62
|
+
* `None` is only added to the output dict for nullable fields that
|
|
63
|
+
were set at model initialization. Other fields with value `None`
|
|
64
|
+
are ignored.
|
|
65
|
+
"""
|
|
66
|
+
excluded_fields: Set[str] = set([
|
|
67
|
+
])
|
|
68
|
+
|
|
69
|
+
_dict = self.model_dump(
|
|
70
|
+
by_alias=True,
|
|
71
|
+
exclude=excluded_fields,
|
|
72
|
+
exclude_none=True,
|
|
73
|
+
)
|
|
74
|
+
return _dict
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
78
|
+
"""Create an instance of V1CreateTestFromTestCasesRequest from a dict"""
|
|
79
|
+
if obj is None:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
if not isinstance(obj, dict):
|
|
83
|
+
return cls.model_validate(obj, strict=False)
|
|
84
|
+
|
|
85
|
+
_obj = cls.model_validate({
|
|
86
|
+
"testsJson": obj.get("testsJson"),
|
|
87
|
+
"url": obj.get("url"),
|
|
88
|
+
"testDisplayName": obj.get("testDisplayName"),
|
|
89
|
+
"testDescription": obj.get("testDescription")
|
|
90
|
+
}, strict=False)
|
|
91
|
+
return _obj
|
|
92
|
+
|
|
93
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_test import V1Test
|
|
23
|
+
from typing import Optional, Set
|
|
24
|
+
from typing_extensions import Self
|
|
25
|
+
|
|
26
|
+
class V1CreateTestFromTestCasesResponse(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
V1CreateTestFromTestCasesResponse
|
|
29
|
+
""" # noqa: E501
|
|
30
|
+
test: Optional[V1Test] = None
|
|
31
|
+
__properties: ClassVar[List[str]] = ["test"]
|
|
32
|
+
|
|
33
|
+
model_config = ConfigDict(
|
|
34
|
+
populate_by_name=True,
|
|
35
|
+
validate_assignment=True,
|
|
36
|
+
protected_namespaces=(),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def to_str(self) -> str:
|
|
41
|
+
"""Returns the string representation of the model using alias"""
|
|
42
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
43
|
+
|
|
44
|
+
def to_json(self) -> str:
|
|
45
|
+
"""Returns the JSON representation of the model using alias"""
|
|
46
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
47
|
+
return json.dumps(self.to_dict())
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
51
|
+
"""Create an instance of V1CreateTestFromTestCasesResponse from a JSON string"""
|
|
52
|
+
return cls.from_dict(json.loads(json_str))
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
55
|
+
"""Return the dictionary representation of the model using alias.
|
|
56
|
+
|
|
57
|
+
This has the following differences from calling pydantic's
|
|
58
|
+
`self.model_dump(by_alias=True)`:
|
|
59
|
+
|
|
60
|
+
* `None` is only added to the output dict for nullable fields that
|
|
61
|
+
were set at model initialization. Other fields with value `None`
|
|
62
|
+
are ignored.
|
|
63
|
+
"""
|
|
64
|
+
excluded_fields: Set[str] = set([
|
|
65
|
+
])
|
|
66
|
+
|
|
67
|
+
_dict = self.model_dump(
|
|
68
|
+
by_alias=True,
|
|
69
|
+
exclude=excluded_fields,
|
|
70
|
+
exclude_none=True,
|
|
71
|
+
)
|
|
72
|
+
# override the default output from pydantic by calling `to_dict()` of test
|
|
73
|
+
if self.test:
|
|
74
|
+
_dict['test'] = self.test.to_dict()
|
|
75
|
+
return _dict
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
79
|
+
"""Create an instance of V1CreateTestFromTestCasesResponse from a dict"""
|
|
80
|
+
if obj is None:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
if not isinstance(obj, dict):
|
|
84
|
+
return cls.model_validate(obj, strict=False)
|
|
85
|
+
|
|
86
|
+
_obj = cls.model_validate({
|
|
87
|
+
"test": V1Test.from_dict(obj["test"]) if obj.get("test") is not None else None
|
|
88
|
+
}, strict=False)
|
|
89
|
+
return _obj
|
|
90
|
+
|
|
91
|
+
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_dashboard_report_result import V1DashboardReportResult
|
|
23
|
+
from eval_studio_client.api.models.v1_leaderboard_report_evaluator import V1LeaderboardReportEvaluator
|
|
24
|
+
from typing import Optional, Set
|
|
25
|
+
from typing_extensions import Self
|
|
26
|
+
|
|
27
|
+
class V1DashboardReport(BaseModel):
|
|
28
|
+
"""
|
|
29
|
+
DashboardReport represents the dashboard report which is formed by the results, models and evaluator.
|
|
30
|
+
""" # noqa: E501
|
|
31
|
+
results: Optional[List[V1DashboardReportResult]] = Field(default=None, description="Output only. List of per test case results.")
|
|
32
|
+
evaluator: Optional[List[V1LeaderboardReportEvaluator]] = Field(default=None, description="Output only. Details of the evaluators which evaluated the model outputs to create the results.")
|
|
33
|
+
__properties: ClassVar[List[str]] = ["results", "evaluator"]
|
|
34
|
+
|
|
35
|
+
model_config = ConfigDict(
|
|
36
|
+
populate_by_name=True,
|
|
37
|
+
validate_assignment=True,
|
|
38
|
+
protected_namespaces=(),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def to_str(self) -> str:
|
|
43
|
+
"""Returns the string representation of the model using alias"""
|
|
44
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
45
|
+
|
|
46
|
+
def to_json(self) -> str:
|
|
47
|
+
"""Returns the JSON representation of the model using alias"""
|
|
48
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
49
|
+
return json.dumps(self.to_dict())
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
53
|
+
"""Create an instance of V1DashboardReport from a JSON string"""
|
|
54
|
+
return cls.from_dict(json.loads(json_str))
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
57
|
+
"""Return the dictionary representation of the model using alias.
|
|
58
|
+
|
|
59
|
+
This has the following differences from calling pydantic's
|
|
60
|
+
`self.model_dump(by_alias=True)`:
|
|
61
|
+
|
|
62
|
+
* `None` is only added to the output dict for nullable fields that
|
|
63
|
+
were set at model initialization. Other fields with value `None`
|
|
64
|
+
are ignored.
|
|
65
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
66
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
67
|
+
"""
|
|
68
|
+
excluded_fields: Set[str] = set([
|
|
69
|
+
"results",
|
|
70
|
+
"evaluator",
|
|
71
|
+
])
|
|
72
|
+
|
|
73
|
+
_dict = self.model_dump(
|
|
74
|
+
by_alias=True,
|
|
75
|
+
exclude=excluded_fields,
|
|
76
|
+
exclude_none=True,
|
|
77
|
+
)
|
|
78
|
+
# override the default output from pydantic by calling `to_dict()` of each item in results (list)
|
|
79
|
+
_items = []
|
|
80
|
+
if self.results:
|
|
81
|
+
for _item_results in self.results:
|
|
82
|
+
if _item_results:
|
|
83
|
+
_items.append(_item_results.to_dict())
|
|
84
|
+
_dict['results'] = _items
|
|
85
|
+
# override the default output from pydantic by calling `to_dict()` of each item in evaluator (list)
|
|
86
|
+
_items = []
|
|
87
|
+
if self.evaluator:
|
|
88
|
+
for _item_evaluator in self.evaluator:
|
|
89
|
+
if _item_evaluator:
|
|
90
|
+
_items.append(_item_evaluator.to_dict())
|
|
91
|
+
_dict['evaluator'] = _items
|
|
92
|
+
return _dict
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
96
|
+
"""Create an instance of V1DashboardReport from a dict"""
|
|
97
|
+
if obj is None:
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
if not isinstance(obj, dict):
|
|
101
|
+
return cls.model_validate(obj, strict=False)
|
|
102
|
+
|
|
103
|
+
_obj = cls.model_validate({
|
|
104
|
+
"results": [V1DashboardReportResult.from_dict(_item) for _item in obj["results"]] if obj.get("results") is not None else None,
|
|
105
|
+
"evaluator": [V1LeaderboardReportEvaluator.from_dict(_item) for _item in obj["evaluator"]] if obj.get("evaluator") is not None else None
|
|
106
|
+
}, strict=False)
|
|
107
|
+
return _obj
|
|
108
|
+
|
|
109
|
+
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
ai/h2o/eval_studio/v1/insight.proto
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: version not set
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
|
+
from eval_studio_client.api.models.v1_human_decision import V1HumanDecision
|
|
23
|
+
from eval_studio_client.api.models.v1_metric_scores import V1MetricScores
|
|
24
|
+
from typing import Optional, Set
|
|
25
|
+
from typing_extensions import Self
|
|
26
|
+
|
|
27
|
+
class V1DashboardReportResult(BaseModel):
|
|
28
|
+
"""
|
|
29
|
+
V1DashboardReportResult
|
|
30
|
+
""" # noqa: E501
|
|
31
|
+
key: Optional[StrictStr] = Field(default=None, description="Output only. Composite unique key of the result formed by the model key and test case key.")
|
|
32
|
+
input: Optional[StrictStr] = Field(default=None, description="Output only. Input prompt or text to be processed.")
|
|
33
|
+
expected_output: Optional[StrictStr] = Field(default=None, description="Output only. Expected output or target result.", alias="expectedOutput")
|
|
34
|
+
actual_output: Optional[StrictStr] = Field(default=None, description="Output only. Actual output produced by the model.", alias="actualOutput")
|
|
35
|
+
model_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the model used.", alias="modelKey")
|
|
36
|
+
test_case_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the test case.", alias="testCaseKey")
|
|
37
|
+
metrics: Optional[Dict[str, V1MetricScores]] = Field(default=None, description="Optional. All metrics values for the result. Maps evaluator ID to MetricScore.")
|
|
38
|
+
result_error_map: Optional[Dict[str, StrictStr]] = Field(default=None, description="Output only. Error message if processing resulted in failure. Maps evaluator ID to error message.", alias="resultErrorMap")
|
|
39
|
+
human_decision: Optional[V1HumanDecision] = Field(default=None, alias="humanDecision")
|
|
40
|
+
comment: Optional[StrictStr] = Field(default=None, description="Output only. Optional comment about the result.")
|
|
41
|
+
annotations: Optional[Dict[str, Dict[str, Any]]] = Field(default=None, description="Output only. Additional annotations for the result.")
|
|
42
|
+
__properties: ClassVar[List[str]] = ["key", "input", "expectedOutput", "actualOutput", "modelKey", "testCaseKey", "metrics", "resultErrorMap", "humanDecision", "comment", "annotations"]
|
|
43
|
+
|
|
44
|
+
model_config = ConfigDict(
|
|
45
|
+
populate_by_name=True,
|
|
46
|
+
validate_assignment=True,
|
|
47
|
+
protected_namespaces=(),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def to_str(self) -> str:
|
|
52
|
+
"""Returns the string representation of the model using alias"""
|
|
53
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
54
|
+
|
|
55
|
+
def to_json(self) -> str:
|
|
56
|
+
"""Returns the JSON representation of the model using alias"""
|
|
57
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
58
|
+
return json.dumps(self.to_dict())
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
62
|
+
"""Create an instance of V1DashboardReportResult from a JSON string"""
|
|
63
|
+
return cls.from_dict(json.loads(json_str))
|
|
64
|
+
|
|
65
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
66
|
+
"""Return the dictionary representation of the model using alias.
|
|
67
|
+
|
|
68
|
+
This has the following differences from calling pydantic's
|
|
69
|
+
`self.model_dump(by_alias=True)`:
|
|
70
|
+
|
|
71
|
+
* `None` is only added to the output dict for nullable fields that
|
|
72
|
+
were set at model initialization. Other fields with value `None`
|
|
73
|
+
are ignored.
|
|
74
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
75
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
76
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
77
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
78
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
79
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
80
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
81
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
82
|
+
* OpenAPI `readOnly` fields are excluded.
|
|
83
|
+
"""
|
|
84
|
+
excluded_fields: Set[str] = set([
|
|
85
|
+
"key",
|
|
86
|
+
"input",
|
|
87
|
+
"expected_output",
|
|
88
|
+
"actual_output",
|
|
89
|
+
"model_key",
|
|
90
|
+
"test_case_key",
|
|
91
|
+
"result_error_map",
|
|
92
|
+
"comment",
|
|
93
|
+
"annotations",
|
|
94
|
+
])
|
|
95
|
+
|
|
96
|
+
_dict = self.model_dump(
|
|
97
|
+
by_alias=True,
|
|
98
|
+
exclude=excluded_fields,
|
|
99
|
+
exclude_none=True,
|
|
100
|
+
)
|
|
101
|
+
# override the default output from pydantic by calling `to_dict()` of each value in metrics (dict)
|
|
102
|
+
_field_dict = {}
|
|
103
|
+
if self.metrics:
|
|
104
|
+
for _key_metrics in self.metrics:
|
|
105
|
+
if self.metrics[_key_metrics]:
|
|
106
|
+
_field_dict[_key_metrics] = self.metrics[_key_metrics].to_dict()
|
|
107
|
+
_dict['metrics'] = _field_dict
|
|
108
|
+
return _dict
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
112
|
+
"""Create an instance of V1DashboardReportResult from a dict"""
|
|
113
|
+
if obj is None:
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
if not isinstance(obj, dict):
|
|
117
|
+
return cls.model_validate(obj, strict=False)
|
|
118
|
+
|
|
119
|
+
_obj = cls.model_validate({
|
|
120
|
+
"key": obj.get("key"),
|
|
121
|
+
"input": obj.get("input"),
|
|
122
|
+
"expectedOutput": obj.get("expectedOutput"),
|
|
123
|
+
"actualOutput": obj.get("actualOutput"),
|
|
124
|
+
"modelKey": obj.get("modelKey"),
|
|
125
|
+
"testCaseKey": obj.get("testCaseKey"),
|
|
126
|
+
"metrics": dict(
|
|
127
|
+
(_k, V1MetricScores.from_dict(_v))
|
|
128
|
+
for _k, _v in obj["metrics"].items()
|
|
129
|
+
)
|
|
130
|
+
if obj.get("metrics") is not None
|
|
131
|
+
else None,
|
|
132
|
+
"resultErrorMap": obj.get("resultErrorMap"),
|
|
133
|
+
"humanDecision": obj.get("humanDecision"),
|
|
134
|
+
"comment": obj.get("comment"),
|
|
135
|
+
"annotations": obj.get("annotations")
|
|
136
|
+
}, strict=False)
|
|
137
|
+
return _obj
|
|
138
|
+
|
|
139
|
+
|