eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. eval_studio_client/api/__init__.py +65 -0
  2. eval_studio_client/api/api/__init__.py +3 -0
  3. eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
  4. eval_studio_client/api/api/dashboard_service_api.py +16 -16
  5. eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
  6. eval_studio_client/api/api/document_service_api.py +16 -16
  7. eval_studio_client/api/api/evaluation_service_api.py +12 -12
  8. eval_studio_client/api/api/evaluator_service_api.py +16 -16
  9. eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
  10. eval_studio_client/api/api/leaderboard_service_api.py +554 -16
  11. eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
  12. eval_studio_client/api/api/model_service_api.py +16 -16
  13. eval_studio_client/api/api/operation_service_api.py +821 -17
  14. eval_studio_client/api/api/perturbator_service_api.py +22 -22
  15. eval_studio_client/api/api/test_case_service_api.py +300 -16
  16. eval_studio_client/api/api/test_class_service_api.py +16 -16
  17. eval_studio_client/api/api/test_service_api.py +285 -16
  18. eval_studio_client/api/api/workflow_node_service_api.py +16 -16
  19. eval_studio_client/api/api/workflow_service_api.py +16 -16
  20. eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
  21. eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
  22. eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
  23. eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
  24. eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
  25. eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
  26. eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
  27. eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
  28. eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
  29. eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
  30. eval_studio_client/api/docs/ModelServiceApi.md +5 -5
  31. eval_studio_client/api/docs/OperationServiceApi.md +215 -8
  32. eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
  33. eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
  34. eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
  35. eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
  36. eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
  37. eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
  38. eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
  39. eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
  40. eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
  41. eval_studio_client/api/docs/TestServiceApi.md +73 -5
  42. eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
  43. eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
  44. eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
  45. eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
  46. eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
  47. eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
  48. eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
  49. eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
  50. eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
  51. eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
  52. eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
  53. eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
  54. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
  55. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
  56. eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
  57. eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
  58. eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
  59. eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
  60. eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
  61. eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
  62. eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
  63. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
  64. eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
  65. eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
  66. eval_studio_client/api/docs/V1DashboardReport.md +31 -0
  67. eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
  68. eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
  69. eval_studio_client/api/docs/V1DataFragment.md +31 -0
  70. eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
  71. eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
  72. eval_studio_client/api/docs/V1DiffItem.md +36 -0
  73. eval_studio_client/api/docs/V1EvaluationType.md +12 -0
  74. eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
  75. eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
  76. eval_studio_client/api/docs/V1HumanDecision.md +12 -0
  77. eval_studio_client/api/docs/V1Info.md +1 -0
  78. eval_studio_client/api/docs/V1Leaderboard.md +1 -0
  79. eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
  80. eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
  81. eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
  82. eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
  83. eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
  84. eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
  85. eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
  86. eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
  87. eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
  88. eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
  89. eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
  90. eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
  91. eval_studio_client/api/docs/V1Metric.md +30 -0
  92. eval_studio_client/api/docs/V1MetricAverage.md +36 -0
  93. eval_studio_client/api/docs/V1MetricMeta.md +40 -0
  94. eval_studio_client/api/docs/V1MetricScore.md +1 -1
  95. eval_studio_client/api/docs/V1MetricScores.md +1 -1
  96. eval_studio_client/api/docs/V1ModelType.md +1 -1
  97. eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
  98. eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
  99. eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
  100. eval_studio_client/api/docs/V1Operation.md +1 -0
  101. eval_studio_client/api/docs/V1OperationView.md +12 -0
  102. eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
  103. eval_studio_client/api/docs/V1Stats.md +2 -0
  104. eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
  105. eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
  106. eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
  107. eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
  108. eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
  109. eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
  110. eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
  111. eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
  112. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
  113. eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
  114. eval_studio_client/api/models/__init__.py +62 -0
  115. eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
  116. eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
  117. eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
  118. eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
  119. eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
  120. eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
  121. eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
  122. eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
  123. eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
  124. eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
  125. eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
  126. eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
  127. eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
  128. eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
  129. eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
  130. eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
  131. eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
  132. eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
  133. eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
  134. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
  135. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
  136. eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
  137. eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
  138. eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
  139. eval_studio_client/api/models/v1_comparison_item.py +130 -0
  140. eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
  141. eval_studio_client/api/models/v1_comparison_result.py +120 -0
  142. eval_studio_client/api/models/v1_comparison_summary.py +91 -0
  143. eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
  144. eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
  145. eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
  146. eval_studio_client/api/models/v1_dashboard_report.py +109 -0
  147. eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
  148. eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
  149. eval_studio_client/api/models/v1_data_fragment.py +91 -0
  150. eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
  151. eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
  152. eval_studio_client/api/models/v1_diff_item.py +137 -0
  153. eval_studio_client/api/models/v1_evaluation_type.py +39 -0
  154. eval_studio_client/api/models/v1_flipped_metric.py +91 -0
  155. eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
  156. eval_studio_client/api/models/v1_human_decision.py +38 -0
  157. eval_studio_client/api/models/v1_info.py +4 -2
  158. eval_studio_client/api/models/v1_leaderboard.py +5 -2
  159. eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
  160. eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
  161. eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
  162. eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
  163. eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
  164. eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
  165. eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
  166. eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
  167. eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
  168. eval_studio_client/api/models/v1_list_operations_response.py +5 -3
  169. eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
  170. eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
  171. eval_studio_client/api/models/v1_metric.py +89 -0
  172. eval_studio_client/api/models/v1_metric_average.py +101 -0
  173. eval_studio_client/api/models/v1_metric_meta.py +109 -0
  174. eval_studio_client/api/models/v1_metric_score.py +6 -1
  175. eval_studio_client/api/models/v1_metric_scores.py +1 -1
  176. eval_studio_client/api/models/v1_model_type.py +2 -1
  177. eval_studio_client/api/models/v1_models_comparisons.py +93 -0
  178. eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
  179. eval_studio_client/api/models/v1_models_overview.py +97 -0
  180. eval_studio_client/api/models/v1_operation.py +6 -2
  181. eval_studio_client/api/models/v1_operation_view.py +38 -0
  182. eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
  183. eval_studio_client/api/models/v1_stats.py +16 -2
  184. eval_studio_client/api/models/v1_technical_metrics.py +96 -0
  185. eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
  186. eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
  187. eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
  188. eval_studio_client/api/models/v1_test_case_result.py +157 -0
  189. eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
  190. eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
  191. eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
  192. eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
  193. eval_studio_client/api/models/v1_workflow_type.py +1 -0
  194. eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
  195. eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
  196. eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
  197. eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
  198. eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
  199. eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
  200. eval_studio_client/api/test/test_operation_service_api.py +18 -0
  201. eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
  202. eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
  203. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
  204. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
  205. eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
  206. eval_studio_client/api/test/test_test_case_service_api.py +6 -0
  207. eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
  208. eval_studio_client/api/test/test_test_service_api.py +6 -0
  209. eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
  210. eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
  211. eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
  212. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
  213. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
  214. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
  215. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
  216. eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
  217. eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
  218. eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
  219. eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
  220. eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
  221. eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
  222. eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
  223. eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
  224. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
  225. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
  226. eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
  227. eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
  228. eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
  229. eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
  230. eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
  231. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
  232. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
  233. eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
  234. eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
  235. eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
  236. eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
  237. eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
  238. eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
  239. eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
  240. eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
  241. eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
  242. eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
  243. eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
  244. eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
  245. eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
  246. eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
  247. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
  248. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
  249. eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
  250. eval_studio_client/api/test/test_v1_diff_item.py +226 -0
  251. eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
  252. eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
  253. eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
  254. eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
  255. eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
  256. eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
  257. eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
  258. eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
  259. eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
  260. eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
  261. eval_studio_client/api/test/test_v1_human_decision.py +33 -0
  262. eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
  263. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
  264. eval_studio_client/api/test/test_v1_info.py +4 -1
  265. eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
  266. eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
  267. eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
  268. eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
  269. eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
  270. eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
  271. eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
  272. eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
  273. eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
  274. eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
  275. eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
  276. eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
  277. eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
  278. eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
  279. eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
  280. eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
  281. eval_studio_client/api/test/test_v1_metric.py +52 -0
  282. eval_studio_client/api/test/test_v1_metric_average.py +58 -0
  283. eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
  284. eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
  285. eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
  286. eval_studio_client/api/test/test_v1_models_overview.py +60 -0
  287. eval_studio_client/api/test/test_v1_operation.py +2 -1
  288. eval_studio_client/api/test/test_v1_operation_view.py +33 -0
  289. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
  290. eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
  291. eval_studio_client/api/test/test_v1_stats.py +3 -1
  292. eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
  293. eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
  294. eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
  295. eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
  296. eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
  297. eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
  298. eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
  299. eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
  300. eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
  301. eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
  302. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
  303. eval_studio_client/models.py +18 -6
  304. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0.dist-info}/METADATA +2 -2
  305. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0.dist-info}/RECORD +306 -111
  306. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,120 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_diff_item import V1DiffItem
23
+ from eval_studio_client.api.models.v1_leaderboard_info import V1LeaderboardInfo
24
+ from eval_studio_client.api.models.v1_metric_meta import V1MetricMeta
25
+ from typing import Optional, Set
26
+ from typing_extensions import Self
27
+
28
+ class V1ComparisonResult(BaseModel):
29
+ """
30
+ V1ComparisonResult
31
+ """ # noqa: E501
32
+ diffs: Optional[List[V1DiffItem]] = Field(default=None, description="List of differences between leaderboards.")
33
+ leaderboards: Optional[List[V1LeaderboardInfo]] = Field(default=None, description="Leaderboard information.")
34
+ metrics_meta: Optional[Dict[str, V1MetricMeta]] = Field(default=None, description="Metadata about metrics.", alias="metricsMeta")
35
+ __properties: ClassVar[List[str]] = ["diffs", "leaderboards", "metricsMeta"]
36
+
37
+ model_config = ConfigDict(
38
+ populate_by_name=True,
39
+ validate_assignment=True,
40
+ protected_namespaces=(),
41
+ )
42
+
43
+
44
+ def to_str(self) -> str:
45
+ """Returns the string representation of the model using alias"""
46
+ return pprint.pformat(self.model_dump(by_alias=True))
47
+
48
+ def to_json(self) -> str:
49
+ """Returns the JSON representation of the model using alias"""
50
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
51
+ return json.dumps(self.to_dict())
52
+
53
+ @classmethod
54
+ def from_json(cls, json_str: str) -> Optional[Self]:
55
+ """Create an instance of V1ComparisonResult from a JSON string"""
56
+ return cls.from_dict(json.loads(json_str))
57
+
58
+ def to_dict(self) -> Dict[str, Any]:
59
+ """Return the dictionary representation of the model using alias.
60
+
61
+ This has the following differences from calling pydantic's
62
+ `self.model_dump(by_alias=True)`:
63
+
64
+ * `None` is only added to the output dict for nullable fields that
65
+ were set at model initialization. Other fields with value `None`
66
+ are ignored.
67
+ """
68
+ excluded_fields: Set[str] = set([
69
+ ])
70
+
71
+ _dict = self.model_dump(
72
+ by_alias=True,
73
+ exclude=excluded_fields,
74
+ exclude_none=True,
75
+ )
76
+ # override the default output from pydantic by calling `to_dict()` of each item in diffs (list)
77
+ _items = []
78
+ if self.diffs:
79
+ for _item_diffs in self.diffs:
80
+ if _item_diffs:
81
+ _items.append(_item_diffs.to_dict())
82
+ _dict['diffs'] = _items
83
+ # override the default output from pydantic by calling `to_dict()` of each item in leaderboards (list)
84
+ _items = []
85
+ if self.leaderboards:
86
+ for _item_leaderboards in self.leaderboards:
87
+ if _item_leaderboards:
88
+ _items.append(_item_leaderboards.to_dict())
89
+ _dict['leaderboards'] = _items
90
+ # override the default output from pydantic by calling `to_dict()` of each value in metrics_meta (dict)
91
+ _field_dict = {}
92
+ if self.metrics_meta:
93
+ for _key_metrics_meta in self.metrics_meta:
94
+ if self.metrics_meta[_key_metrics_meta]:
95
+ _field_dict[_key_metrics_meta] = self.metrics_meta[_key_metrics_meta].to_dict()
96
+ _dict['metricsMeta'] = _field_dict
97
+ return _dict
98
+
99
+ @classmethod
100
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
101
+ """Create an instance of V1ComparisonResult from a dict"""
102
+ if obj is None:
103
+ return None
104
+
105
+ if not isinstance(obj, dict):
106
+ return cls.model_validate(obj, strict=False)
107
+
108
+ _obj = cls.model_validate({
109
+ "diffs": [V1DiffItem.from_dict(_item) for _item in obj["diffs"]] if obj.get("diffs") is not None else None,
110
+ "leaderboards": [V1LeaderboardInfo.from_dict(_item) for _item in obj["leaderboards"]] if obj.get("leaderboards") is not None else None,
111
+ "metricsMeta": dict(
112
+ (_k, V1MetricMeta.from_dict(_v))
113
+ for _k, _v in obj["metricsMeta"].items()
114
+ )
115
+ if obj.get("metricsMeta") is not None
116
+ else None
117
+ }, strict=False)
118
+ return _obj
119
+
120
+
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class V1ComparisonSummary(BaseModel):
26
+ """
27
+ V1ComparisonSummary
28
+ """ # noqa: E501
29
+ recommendation_winner: Optional[StrictStr] = Field(default=None, description="Winner of the comparison (baseline, current, or tie).", alias="recommendationWinner")
30
+ recommendation: Optional[StrictStr] = Field(default=None, description="Recommendation text.")
31
+ recommendation_confidence: Optional[StrictStr] = Field(default=None, description="Confidence level of the recommendation.", alias="recommendationConfidence")
32
+ __properties: ClassVar[List[str]] = ["recommendationWinner", "recommendation", "recommendationConfidence"]
33
+
34
+ model_config = ConfigDict(
35
+ populate_by_name=True,
36
+ validate_assignment=True,
37
+ protected_namespaces=(),
38
+ )
39
+
40
+
41
+ def to_str(self) -> str:
42
+ """Returns the string representation of the model using alias"""
43
+ return pprint.pformat(self.model_dump(by_alias=True))
44
+
45
+ def to_json(self) -> str:
46
+ """Returns the JSON representation of the model using alias"""
47
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
48
+ return json.dumps(self.to_dict())
49
+
50
+ @classmethod
51
+ def from_json(cls, json_str: str) -> Optional[Self]:
52
+ """Create an instance of V1ComparisonSummary from a JSON string"""
53
+ return cls.from_dict(json.loads(json_str))
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ """Return the dictionary representation of the model using alias.
57
+
58
+ This has the following differences from calling pydantic's
59
+ `self.model_dump(by_alias=True)`:
60
+
61
+ * `None` is only added to the output dict for nullable fields that
62
+ were set at model initialization. Other fields with value `None`
63
+ are ignored.
64
+ """
65
+ excluded_fields: Set[str] = set([
66
+ ])
67
+
68
+ _dict = self.model_dump(
69
+ by_alias=True,
70
+ exclude=excluded_fields,
71
+ exclude_none=True,
72
+ )
73
+ return _dict
74
+
75
+ @classmethod
76
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
77
+ """Create an instance of V1ComparisonSummary from a dict"""
78
+ if obj is None:
79
+ return None
80
+
81
+ if not isinstance(obj, dict):
82
+ return cls.model_validate(obj, strict=False)
83
+
84
+ _obj = cls.model_validate({
85
+ "recommendationWinner": obj.get("recommendationWinner"),
86
+ "recommendation": obj.get("recommendation"),
87
+ "recommendationConfidence": obj.get("recommendationConfidence")
88
+ }, strict=False)
89
+ return _obj
90
+
91
+
@@ -20,6 +20,7 @@ import json
20
20
  from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
21
21
  from typing import Any, ClassVar, Dict, List, Optional
22
22
  from eval_studio_client.api.models.v1_evaluation_test import V1EvaluationTest
23
+ from eval_studio_client.api.models.v1_evaluation_type import V1EvaluationType
23
24
  from eval_studio_client.api.models.v1_model import V1Model
24
25
  from typing import Optional, Set
25
26
  from typing_extensions import Self
@@ -38,7 +39,8 @@ class V1CreateEvaluationRequest(BaseModel):
38
39
  model_parameters: Optional[StrictStr] = Field(default=None, description="Optional. Parameters overrides in JSON format.", alias="modelParameters")
39
40
  h2ogpte_collection: Optional[StrictStr] = Field(default=None, description="The existing collection name in H2OGPTe.", alias="h2ogpteCollection")
40
41
  default_h2ogpte_model: Optional[V1Model] = Field(default=None, alias="defaultH2ogpteModel")
41
- __properties: ClassVar[List[str]] = ["evaluatorIdentifiers", "model", "evaluationTests", "operation", "llmModels", "useCache", "evaluatorsParameters", "modelParameters", "h2ogpteCollection", "defaultH2ogpteModel"]
42
+ evaluation_type: Optional[V1EvaluationType] = Field(default=None, alias="evaluationType")
43
+ __properties: ClassVar[List[str]] = ["evaluatorIdentifiers", "model", "evaluationTests", "operation", "llmModels", "useCache", "evaluatorsParameters", "modelParameters", "h2ogpteCollection", "defaultH2ogpteModel", "evaluationType"]
42
44
 
43
45
  model_config = ConfigDict(
44
46
  populate_by_name=True,
@@ -113,7 +115,8 @@ class V1CreateEvaluationRequest(BaseModel):
113
115
  "evaluatorsParameters": obj.get("evaluatorsParameters"),
114
116
  "modelParameters": obj.get("modelParameters"),
115
117
  "h2ogpteCollection": obj.get("h2ogpteCollection"),
116
- "defaultH2ogpteModel": V1Model.from_dict(obj["defaultH2ogpteModel"]) if obj.get("defaultH2ogpteModel") is not None else None
118
+ "defaultH2ogpteModel": V1Model.from_dict(obj["defaultH2ogpteModel"]) if obj.get("defaultH2ogpteModel") is not None else None,
119
+ "evaluationType": obj.get("evaluationType")
117
120
  }, strict=False)
118
121
  return _obj
119
122
 
@@ -0,0 +1,93 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class V1CreateTestFromTestCasesRequest(BaseModel):
26
+ """
27
+ V1CreateTestFromTestCasesRequest
28
+ """ # noqa: E501
29
+ tests_json: Optional[StrictStr] = Field(default=None, description="Test Cases in JSON format.", alias="testsJson")
30
+ url: Optional[StrictStr] = Field(default=None, description="URL pointing to the Test Cases in JSON format to import.")
31
+ test_display_name: Optional[StrictStr] = Field(default=None, description="Required. Display name of the newly created Test.", alias="testDisplayName")
32
+ test_description: Optional[StrictStr] = Field(default=None, description="Optional. Description of the newly created Tests.", alias="testDescription")
33
+ __properties: ClassVar[List[str]] = ["testsJson", "url", "testDisplayName", "testDescription"]
34
+
35
+ model_config = ConfigDict(
36
+ populate_by_name=True,
37
+ validate_assignment=True,
38
+ protected_namespaces=(),
39
+ )
40
+
41
+
42
+ def to_str(self) -> str:
43
+ """Returns the string representation of the model using alias"""
44
+ return pprint.pformat(self.model_dump(by_alias=True))
45
+
46
+ def to_json(self) -> str:
47
+ """Returns the JSON representation of the model using alias"""
48
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
49
+ return json.dumps(self.to_dict())
50
+
51
+ @classmethod
52
+ def from_json(cls, json_str: str) -> Optional[Self]:
53
+ """Create an instance of V1CreateTestFromTestCasesRequest from a JSON string"""
54
+ return cls.from_dict(json.loads(json_str))
55
+
56
+ def to_dict(self) -> Dict[str, Any]:
57
+ """Return the dictionary representation of the model using alias.
58
+
59
+ This has the following differences from calling pydantic's
60
+ `self.model_dump(by_alias=True)`:
61
+
62
+ * `None` is only added to the output dict for nullable fields that
63
+ were set at model initialization. Other fields with value `None`
64
+ are ignored.
65
+ """
66
+ excluded_fields: Set[str] = set([
67
+ ])
68
+
69
+ _dict = self.model_dump(
70
+ by_alias=True,
71
+ exclude=excluded_fields,
72
+ exclude_none=True,
73
+ )
74
+ return _dict
75
+
76
+ @classmethod
77
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
78
+ """Create an instance of V1CreateTestFromTestCasesRequest from a dict"""
79
+ if obj is None:
80
+ return None
81
+
82
+ if not isinstance(obj, dict):
83
+ return cls.model_validate(obj, strict=False)
84
+
85
+ _obj = cls.model_validate({
86
+ "testsJson": obj.get("testsJson"),
87
+ "url": obj.get("url"),
88
+ "testDisplayName": obj.get("testDisplayName"),
89
+ "testDescription": obj.get("testDescription")
90
+ }, strict=False)
91
+ return _obj
92
+
93
+
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_test import V1Test
23
+ from typing import Optional, Set
24
+ from typing_extensions import Self
25
+
26
+ class V1CreateTestFromTestCasesResponse(BaseModel):
27
+ """
28
+ V1CreateTestFromTestCasesResponse
29
+ """ # noqa: E501
30
+ test: Optional[V1Test] = None
31
+ __properties: ClassVar[List[str]] = ["test"]
32
+
33
+ model_config = ConfigDict(
34
+ populate_by_name=True,
35
+ validate_assignment=True,
36
+ protected_namespaces=(),
37
+ )
38
+
39
+
40
+ def to_str(self) -> str:
41
+ """Returns the string representation of the model using alias"""
42
+ return pprint.pformat(self.model_dump(by_alias=True))
43
+
44
+ def to_json(self) -> str:
45
+ """Returns the JSON representation of the model using alias"""
46
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
47
+ return json.dumps(self.to_dict())
48
+
49
+ @classmethod
50
+ def from_json(cls, json_str: str) -> Optional[Self]:
51
+ """Create an instance of V1CreateTestFromTestCasesResponse from a JSON string"""
52
+ return cls.from_dict(json.loads(json_str))
53
+
54
+ def to_dict(self) -> Dict[str, Any]:
55
+ """Return the dictionary representation of the model using alias.
56
+
57
+ This has the following differences from calling pydantic's
58
+ `self.model_dump(by_alias=True)`:
59
+
60
+ * `None` is only added to the output dict for nullable fields that
61
+ were set at model initialization. Other fields with value `None`
62
+ are ignored.
63
+ """
64
+ excluded_fields: Set[str] = set([
65
+ ])
66
+
67
+ _dict = self.model_dump(
68
+ by_alias=True,
69
+ exclude=excluded_fields,
70
+ exclude_none=True,
71
+ )
72
+ # override the default output from pydantic by calling `to_dict()` of test
73
+ if self.test:
74
+ _dict['test'] = self.test.to_dict()
75
+ return _dict
76
+
77
+ @classmethod
78
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
79
+ """Create an instance of V1CreateTestFromTestCasesResponse from a dict"""
80
+ if obj is None:
81
+ return None
82
+
83
+ if not isinstance(obj, dict):
84
+ return cls.model_validate(obj, strict=False)
85
+
86
+ _obj = cls.model_validate({
87
+ "test": V1Test.from_dict(obj["test"]) if obj.get("test") is not None else None
88
+ }, strict=False)
89
+ return _obj
90
+
91
+
@@ -0,0 +1,109 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_dashboard_report_result import V1DashboardReportResult
23
+ from eval_studio_client.api.models.v1_leaderboard_report_evaluator import V1LeaderboardReportEvaluator
24
+ from typing import Optional, Set
25
+ from typing_extensions import Self
26
+
27
+ class V1DashboardReport(BaseModel):
28
+ """
29
+ DashboardReport represents the dashboard report which is formed by the results, models and evaluator.
30
+ """ # noqa: E501
31
+ results: Optional[List[V1DashboardReportResult]] = Field(default=None, description="Output only. List of per test case results.")
32
+ evaluator: Optional[List[V1LeaderboardReportEvaluator]] = Field(default=None, description="Output only. Details of the evaluators which evaluated the model outputs to create the results.")
33
+ __properties: ClassVar[List[str]] = ["results", "evaluator"]
34
+
35
+ model_config = ConfigDict(
36
+ populate_by_name=True,
37
+ validate_assignment=True,
38
+ protected_namespaces=(),
39
+ )
40
+
41
+
42
+ def to_str(self) -> str:
43
+ """Returns the string representation of the model using alias"""
44
+ return pprint.pformat(self.model_dump(by_alias=True))
45
+
46
+ def to_json(self) -> str:
47
+ """Returns the JSON representation of the model using alias"""
48
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
49
+ return json.dumps(self.to_dict())
50
+
51
+ @classmethod
52
+ def from_json(cls, json_str: str) -> Optional[Self]:
53
+ """Create an instance of V1DashboardReport from a JSON string"""
54
+ return cls.from_dict(json.loads(json_str))
55
+
56
+ def to_dict(self) -> Dict[str, Any]:
57
+ """Return the dictionary representation of the model using alias.
58
+
59
+ This has the following differences from calling pydantic's
60
+ `self.model_dump(by_alias=True)`:
61
+
62
+ * `None` is only added to the output dict for nullable fields that
63
+ were set at model initialization. Other fields with value `None`
64
+ are ignored.
65
+ * OpenAPI `readOnly` fields are excluded.
66
+ * OpenAPI `readOnly` fields are excluded.
67
+ """
68
+ excluded_fields: Set[str] = set([
69
+ "results",
70
+ "evaluator",
71
+ ])
72
+
73
+ _dict = self.model_dump(
74
+ by_alias=True,
75
+ exclude=excluded_fields,
76
+ exclude_none=True,
77
+ )
78
+ # override the default output from pydantic by calling `to_dict()` of each item in results (list)
79
+ _items = []
80
+ if self.results:
81
+ for _item_results in self.results:
82
+ if _item_results:
83
+ _items.append(_item_results.to_dict())
84
+ _dict['results'] = _items
85
+ # override the default output from pydantic by calling `to_dict()` of each item in evaluator (list)
86
+ _items = []
87
+ if self.evaluator:
88
+ for _item_evaluator in self.evaluator:
89
+ if _item_evaluator:
90
+ _items.append(_item_evaluator.to_dict())
91
+ _dict['evaluator'] = _items
92
+ return _dict
93
+
94
+ @classmethod
95
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
96
+ """Create an instance of V1DashboardReport from a dict"""
97
+ if obj is None:
98
+ return None
99
+
100
+ if not isinstance(obj, dict):
101
+ return cls.model_validate(obj, strict=False)
102
+
103
+ _obj = cls.model_validate({
104
+ "results": [V1DashboardReportResult.from_dict(_item) for _item in obj["results"]] if obj.get("results") is not None else None,
105
+ "evaluator": [V1LeaderboardReportEvaluator.from_dict(_item) for _item in obj["evaluator"]] if obj.get("evaluator") is not None else None
106
+ }, strict=False)
107
+ return _obj
108
+
109
+
@@ -0,0 +1,139 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_human_decision import V1HumanDecision
23
+ from eval_studio_client.api.models.v1_metric_scores import V1MetricScores
24
+ from typing import Optional, Set
25
+ from typing_extensions import Self
26
+
27
+ class V1DashboardReportResult(BaseModel):
28
+ """
29
+ V1DashboardReportResult
30
+ """ # noqa: E501
31
+ key: Optional[StrictStr] = Field(default=None, description="Output only. Composite unique key of the result formed by the model key and test case key.")
32
+ input: Optional[StrictStr] = Field(default=None, description="Output only. Input prompt or text to be processed.")
33
+ expected_output: Optional[StrictStr] = Field(default=None, description="Output only. Expected output or target result.", alias="expectedOutput")
34
+ actual_output: Optional[StrictStr] = Field(default=None, description="Output only. Actual output produced by the model.", alias="actualOutput")
35
+ model_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the model used.", alias="modelKey")
36
+ test_case_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the test case.", alias="testCaseKey")
37
+ metrics: Optional[Dict[str, V1MetricScores]] = Field(default=None, description="Optional. All metrics values for the result. Maps evaluator ID to MetricScore.")
38
+ result_error_map: Optional[Dict[str, StrictStr]] = Field(default=None, description="Output only. Error message if processing resulted in failure. Maps evaluator ID to error message.", alias="resultErrorMap")
39
+ human_decision: Optional[V1HumanDecision] = Field(default=None, alias="humanDecision")
40
+ comment: Optional[StrictStr] = Field(default=None, description="Output only. Optional comment about the result.")
41
+ annotations: Optional[Dict[str, Dict[str, Any]]] = Field(default=None, description="Output only. Additional annotations for the result.")
42
+ __properties: ClassVar[List[str]] = ["key", "input", "expectedOutput", "actualOutput", "modelKey", "testCaseKey", "metrics", "resultErrorMap", "humanDecision", "comment", "annotations"]
43
+
44
+ model_config = ConfigDict(
45
+ populate_by_name=True,
46
+ validate_assignment=True,
47
+ protected_namespaces=(),
48
+ )
49
+
50
+
51
+ def to_str(self) -> str:
52
+ """Returns the string representation of the model using alias"""
53
+ return pprint.pformat(self.model_dump(by_alias=True))
54
+
55
+ def to_json(self) -> str:
56
+ """Returns the JSON representation of the model using alias"""
57
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
58
+ return json.dumps(self.to_dict())
59
+
60
+ @classmethod
61
+ def from_json(cls, json_str: str) -> Optional[Self]:
62
+ """Create an instance of V1DashboardReportResult from a JSON string"""
63
+ return cls.from_dict(json.loads(json_str))
64
+
65
+ def to_dict(self) -> Dict[str, Any]:
66
+ """Return the dictionary representation of the model using alias.
67
+
68
+ This has the following differences from calling pydantic's
69
+ `self.model_dump(by_alias=True)`:
70
+
71
+ * `None` is only added to the output dict for nullable fields that
72
+ were set at model initialization. Other fields with value `None`
73
+ are ignored.
74
+ * OpenAPI `readOnly` fields are excluded.
75
+ * OpenAPI `readOnly` fields are excluded.
76
+ * OpenAPI `readOnly` fields are excluded.
77
+ * OpenAPI `readOnly` fields are excluded.
78
+ * OpenAPI `readOnly` fields are excluded.
79
+ * OpenAPI `readOnly` fields are excluded.
80
+ * OpenAPI `readOnly` fields are excluded.
81
+ * OpenAPI `readOnly` fields are excluded.
82
+ * OpenAPI `readOnly` fields are excluded.
83
+ """
84
+ excluded_fields: Set[str] = set([
85
+ "key",
86
+ "input",
87
+ "expected_output",
88
+ "actual_output",
89
+ "model_key",
90
+ "test_case_key",
91
+ "result_error_map",
92
+ "comment",
93
+ "annotations",
94
+ ])
95
+
96
+ _dict = self.model_dump(
97
+ by_alias=True,
98
+ exclude=excluded_fields,
99
+ exclude_none=True,
100
+ )
101
+ # override the default output from pydantic by calling `to_dict()` of each value in metrics (dict)
102
+ _field_dict = {}
103
+ if self.metrics:
104
+ for _key_metrics in self.metrics:
105
+ if self.metrics[_key_metrics]:
106
+ _field_dict[_key_metrics] = self.metrics[_key_metrics].to_dict()
107
+ _dict['metrics'] = _field_dict
108
+ return _dict
109
+
110
+ @classmethod
111
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
112
+ """Create an instance of V1DashboardReportResult from a dict"""
113
+ if obj is None:
114
+ return None
115
+
116
+ if not isinstance(obj, dict):
117
+ return cls.model_validate(obj, strict=False)
118
+
119
+ _obj = cls.model_validate({
120
+ "key": obj.get("key"),
121
+ "input": obj.get("input"),
122
+ "expectedOutput": obj.get("expectedOutput"),
123
+ "actualOutput": obj.get("actualOutput"),
124
+ "modelKey": obj.get("modelKey"),
125
+ "testCaseKey": obj.get("testCaseKey"),
126
+ "metrics": dict(
127
+ (_k, V1MetricScores.from_dict(_v))
128
+ for _k, _v in obj["metrics"].items()
129
+ )
130
+ if obj.get("metrics") is not None
131
+ else None,
132
+ "resultErrorMap": obj.get("resultErrorMap"),
133
+ "humanDecision": obj.get("humanDecision"),
134
+ "comment": obj.get("comment"),
135
+ "annotations": obj.get("annotations")
136
+ }, strict=False)
137
+ return _obj
138
+
139
+