eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. eval_studio_client/api/__init__.py +65 -0
  2. eval_studio_client/api/api/__init__.py +3 -0
  3. eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
  4. eval_studio_client/api/api/dashboard_service_api.py +16 -16
  5. eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
  6. eval_studio_client/api/api/document_service_api.py +16 -16
  7. eval_studio_client/api/api/evaluation_service_api.py +12 -12
  8. eval_studio_client/api/api/evaluator_service_api.py +16 -16
  9. eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
  10. eval_studio_client/api/api/leaderboard_service_api.py +554 -16
  11. eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
  12. eval_studio_client/api/api/model_service_api.py +16 -16
  13. eval_studio_client/api/api/operation_service_api.py +821 -17
  14. eval_studio_client/api/api/perturbator_service_api.py +22 -22
  15. eval_studio_client/api/api/test_case_service_api.py +300 -16
  16. eval_studio_client/api/api/test_class_service_api.py +16 -16
  17. eval_studio_client/api/api/test_service_api.py +285 -16
  18. eval_studio_client/api/api/workflow_node_service_api.py +16 -16
  19. eval_studio_client/api/api/workflow_service_api.py +16 -16
  20. eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
  21. eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
  22. eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
  23. eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
  24. eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
  25. eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
  26. eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
  27. eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
  28. eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
  29. eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
  30. eval_studio_client/api/docs/ModelServiceApi.md +5 -5
  31. eval_studio_client/api/docs/OperationServiceApi.md +215 -8
  32. eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
  33. eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
  34. eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
  35. eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
  36. eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
  37. eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
  38. eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
  39. eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
  40. eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
  41. eval_studio_client/api/docs/TestServiceApi.md +73 -5
  42. eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
  43. eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
  44. eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
  45. eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
  46. eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
  47. eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
  48. eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
  49. eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
  50. eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
  51. eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
  52. eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
  53. eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
  54. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
  55. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
  56. eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
  57. eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
  58. eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
  59. eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
  60. eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
  61. eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
  62. eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
  63. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
  64. eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
  65. eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
  66. eval_studio_client/api/docs/V1DashboardReport.md +31 -0
  67. eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
  68. eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
  69. eval_studio_client/api/docs/V1DataFragment.md +31 -0
  70. eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
  71. eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
  72. eval_studio_client/api/docs/V1DiffItem.md +36 -0
  73. eval_studio_client/api/docs/V1EvaluationType.md +12 -0
  74. eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
  75. eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
  76. eval_studio_client/api/docs/V1HumanDecision.md +12 -0
  77. eval_studio_client/api/docs/V1Info.md +1 -0
  78. eval_studio_client/api/docs/V1Leaderboard.md +1 -0
  79. eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
  80. eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
  81. eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
  82. eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
  83. eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
  84. eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
  85. eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
  86. eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
  87. eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
  88. eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
  89. eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
  90. eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
  91. eval_studio_client/api/docs/V1Metric.md +30 -0
  92. eval_studio_client/api/docs/V1MetricAverage.md +36 -0
  93. eval_studio_client/api/docs/V1MetricMeta.md +40 -0
  94. eval_studio_client/api/docs/V1MetricScore.md +1 -1
  95. eval_studio_client/api/docs/V1MetricScores.md +1 -1
  96. eval_studio_client/api/docs/V1ModelType.md +1 -1
  97. eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
  98. eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
  99. eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
  100. eval_studio_client/api/docs/V1Operation.md +1 -0
  101. eval_studio_client/api/docs/V1OperationView.md +12 -0
  102. eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
  103. eval_studio_client/api/docs/V1Stats.md +2 -0
  104. eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
  105. eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
  106. eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
  107. eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
  108. eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
  109. eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
  110. eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
  111. eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
  112. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
  113. eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
  114. eval_studio_client/api/models/__init__.py +62 -0
  115. eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
  116. eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
  117. eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
  118. eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
  119. eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
  120. eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
  121. eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
  122. eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
  123. eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
  124. eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
  125. eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
  126. eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
  127. eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
  128. eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
  129. eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
  130. eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
  131. eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
  132. eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
  133. eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
  134. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
  135. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
  136. eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
  137. eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
  138. eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
  139. eval_studio_client/api/models/v1_comparison_item.py +130 -0
  140. eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
  141. eval_studio_client/api/models/v1_comparison_result.py +120 -0
  142. eval_studio_client/api/models/v1_comparison_summary.py +91 -0
  143. eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
  144. eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
  145. eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
  146. eval_studio_client/api/models/v1_dashboard_report.py +109 -0
  147. eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
  148. eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
  149. eval_studio_client/api/models/v1_data_fragment.py +91 -0
  150. eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
  151. eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
  152. eval_studio_client/api/models/v1_diff_item.py +137 -0
  153. eval_studio_client/api/models/v1_evaluation_type.py +39 -0
  154. eval_studio_client/api/models/v1_flipped_metric.py +91 -0
  155. eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
  156. eval_studio_client/api/models/v1_human_decision.py +38 -0
  157. eval_studio_client/api/models/v1_info.py +4 -2
  158. eval_studio_client/api/models/v1_leaderboard.py +5 -2
  159. eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
  160. eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
  161. eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
  162. eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
  163. eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
  164. eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
  165. eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
  166. eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
  167. eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
  168. eval_studio_client/api/models/v1_list_operations_response.py +5 -3
  169. eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
  170. eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
  171. eval_studio_client/api/models/v1_metric.py +89 -0
  172. eval_studio_client/api/models/v1_metric_average.py +101 -0
  173. eval_studio_client/api/models/v1_metric_meta.py +109 -0
  174. eval_studio_client/api/models/v1_metric_score.py +6 -1
  175. eval_studio_client/api/models/v1_metric_scores.py +1 -1
  176. eval_studio_client/api/models/v1_model_type.py +2 -1
  177. eval_studio_client/api/models/v1_models_comparisons.py +93 -0
  178. eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
  179. eval_studio_client/api/models/v1_models_overview.py +97 -0
  180. eval_studio_client/api/models/v1_operation.py +6 -2
  181. eval_studio_client/api/models/v1_operation_view.py +38 -0
  182. eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
  183. eval_studio_client/api/models/v1_stats.py +16 -2
  184. eval_studio_client/api/models/v1_technical_metrics.py +96 -0
  185. eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
  186. eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
  187. eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
  188. eval_studio_client/api/models/v1_test_case_result.py +157 -0
  189. eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
  190. eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
  191. eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
  192. eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
  193. eval_studio_client/api/models/v1_workflow_type.py +1 -0
  194. eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
  195. eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
  196. eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
  197. eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
  198. eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
  199. eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
  200. eval_studio_client/api/test/test_operation_service_api.py +18 -0
  201. eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
  202. eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
  203. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
  204. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
  205. eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
  206. eval_studio_client/api/test/test_test_case_service_api.py +6 -0
  207. eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
  208. eval_studio_client/api/test/test_test_service_api.py +6 -0
  209. eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
  210. eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
  211. eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
  212. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
  213. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
  214. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
  215. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
  216. eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
  217. eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
  218. eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
  219. eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
  220. eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
  221. eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
  222. eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
  223. eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
  224. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
  225. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
  226. eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
  227. eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
  228. eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
  229. eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
  230. eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
  231. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
  232. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
  233. eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
  234. eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
  235. eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
  236. eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
  237. eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
  238. eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
  239. eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
  240. eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
  241. eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
  242. eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
  243. eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
  244. eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
  245. eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
  246. eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
  247. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
  248. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
  249. eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
  250. eval_studio_client/api/test/test_v1_diff_item.py +226 -0
  251. eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
  252. eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
  253. eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
  254. eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
  255. eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
  256. eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
  257. eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
  258. eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
  259. eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
  260. eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
  261. eval_studio_client/api/test/test_v1_human_decision.py +33 -0
  262. eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
  263. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
  264. eval_studio_client/api/test/test_v1_info.py +4 -1
  265. eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
  266. eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
  267. eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
  268. eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
  269. eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
  270. eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
  271. eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
  272. eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
  273. eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
  274. eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
  275. eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
  276. eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
  277. eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
  278. eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
  279. eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
  280. eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
  281. eval_studio_client/api/test/test_v1_metric.py +52 -0
  282. eval_studio_client/api/test/test_v1_metric_average.py +58 -0
  283. eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
  284. eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
  285. eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
  286. eval_studio_client/api/test/test_v1_models_overview.py +60 -0
  287. eval_studio_client/api/test/test_v1_operation.py +2 -1
  288. eval_studio_client/api/test/test_v1_operation_view.py +33 -0
  289. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
  290. eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
  291. eval_studio_client/api/test/test_v1_stats.py +3 -1
  292. eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
  293. eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
  294. eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
  295. eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
  296. eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
  297. eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
  298. eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
  299. eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
  300. eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
  301. eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
  302. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
  303. eval_studio_client/models.py +18 -6
  304. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/METADATA +2 -2
  305. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/RECORD +306 -111
  306. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_dashboard_report import V1DashboardReport
23
+ from typing import Optional, Set
24
+ from typing_extensions import Self
25
+
26
+ class V1GetDashboardReportResponse(BaseModel):
27
+ """
28
+ V1GetDashboardReportResponse
29
+ """ # noqa: E501
30
+ dashboard_report: Optional[V1DashboardReport] = Field(default=None, alias="dashboardReport")
31
+ __properties: ClassVar[List[str]] = ["dashboardReport"]
32
+
33
+ model_config = ConfigDict(
34
+ populate_by_name=True,
35
+ validate_assignment=True,
36
+ protected_namespaces=(),
37
+ )
38
+
39
+
40
+ def to_str(self) -> str:
41
+ """Returns the string representation of the model using alias"""
42
+ return pprint.pformat(self.model_dump(by_alias=True))
43
+
44
+ def to_json(self) -> str:
45
+ """Returns the JSON representation of the model using alias"""
46
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
47
+ return json.dumps(self.to_dict())
48
+
49
+ @classmethod
50
+ def from_json(cls, json_str: str) -> Optional[Self]:
51
+ """Create an instance of V1GetDashboardReportResponse from a JSON string"""
52
+ return cls.from_dict(json.loads(json_str))
53
+
54
+ def to_dict(self) -> Dict[str, Any]:
55
+ """Return the dictionary representation of the model using alias.
56
+
57
+ This has the following differences from calling pydantic's
58
+ `self.model_dump(by_alias=True)`:
59
+
60
+ * `None` is only added to the output dict for nullable fields that
61
+ were set at model initialization. Other fields with value `None`
62
+ are ignored.
63
+ """
64
+ excluded_fields: Set[str] = set([
65
+ ])
66
+
67
+ _dict = self.model_dump(
68
+ by_alias=True,
69
+ exclude=excluded_fields,
70
+ exclude_none=True,
71
+ )
72
+ # override the default output from pydantic by calling `to_dict()` of dashboard_report
73
+ if self.dashboard_report:
74
+ _dict['dashboardReport'] = self.dashboard_report.to_dict()
75
+ return _dict
76
+
77
+ @classmethod
78
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
79
+ """Create an instance of V1GetDashboardReportResponse from a dict"""
80
+ if obj is None:
81
+ return None
82
+
83
+ if not isinstance(obj, dict):
84
+ return cls.model_validate(obj, strict=False)
85
+
86
+ _obj = cls.model_validate({
87
+ "dashboardReport": V1DashboardReport.from_dict(obj["dashboardReport"]) if obj.get("dashboardReport") is not None else None
88
+ }, strict=False)
89
+ return _obj
90
+
91
+
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import json
17
+ from enum import Enum
18
+ from typing_extensions import Self
19
+
20
+
21
+ class V1HumanDecision(str, Enum):
22
+ """
23
+ HumanDecision represents the human decision on the result. - HUMAN_DECISION_UNSPECIFIED: Default value, means no decision has been made. - HUMAN_DECISION_APPROVED: The result was approved by human. - HUMAN_DECISION_REJECTED: The result was rejected by human.
24
+ """
25
+
26
+ """
27
+ allowed enum values
28
+ """
29
+ HUMAN_DECISION_UNSPECIFIED = 'HUMAN_DECISION_UNSPECIFIED'
30
+ HUMAN_DECISION_APPROVED = 'HUMAN_DECISION_APPROVED'
31
+ HUMAN_DECISION_REJECTED = 'HUMAN_DECISION_REJECTED'
32
+
33
+ @classmethod
34
+ def from_json(cls, json_str: str) -> Self:
35
+ """Create an instance of V1HumanDecision from a JSON string"""
36
+ return cls(json.loads(json_str))
37
+
38
+
@@ -38,7 +38,8 @@ class V1Info(BaseModel):
38
38
  public_instance: Optional[StrictBool] = Field(default=None, description="If the Eval Studio instance is public.", alias="publicInstance")
39
39
  sharing_enabled: Optional[StrictBool] = Field(default=None, description="Whether the sharing capability is enabled.", alias="sharingEnabled")
40
40
  experimental_features_enabled: Optional[StrictBool] = Field(default=None, description="Whether the experimental features are enabled.", alias="experimentalFeaturesEnabled")
41
- __properties: ClassVar[List[str]] = ["baseUrl", "version", "oauth2LoginUrl", "oauth2LogoutUrl", "h2oGpteAllowlist", "h2oGpteClientVersion", "h2oSonarVersion", "preferredLlmsForTestGeneration", "h2oCloudUrl", "publicInstance", "sharingEnabled", "experimentalFeaturesEnabled"]
41
+ model_type_allowlist: Optional[List[StrictStr]] = Field(default=None, description="Allowlist of model types for UI that can be hosted in Eval Studio. E.g. MODEL_TYPE_H2OGPTE_RAG, MODEL_TYPE_OPENAI_CHAT, MODEL_TYPE_AMAZON_BEDROCK. Use \"*\" to allow all model types.", alias="modelTypeAllowlist")
42
+ __properties: ClassVar[List[str]] = ["baseUrl", "version", "oauth2LoginUrl", "oauth2LogoutUrl", "h2oGpteAllowlist", "h2oGpteClientVersion", "h2oSonarVersion", "preferredLlmsForTestGeneration", "h2oCloudUrl", "publicInstance", "sharingEnabled", "experimentalFeaturesEnabled", "modelTypeAllowlist"]
42
43
 
43
44
  model_config = ConfigDict(
44
45
  populate_by_name=True,
@@ -102,7 +103,8 @@ class V1Info(BaseModel):
102
103
  "h2oCloudUrl": obj.get("h2oCloudUrl"),
103
104
  "publicInstance": obj.get("publicInstance"),
104
105
  "sharingEnabled": obj.get("sharingEnabled"),
105
- "experimentalFeaturesEnabled": obj.get("experimentalFeaturesEnabled")
106
+ "experimentalFeaturesEnabled": obj.get("experimentalFeaturesEnabled"),
107
+ "modelTypeAllowlist": obj.get("modelTypeAllowlist")
106
108
  }, strict=False)
107
109
  return _obj
108
110
 
@@ -20,6 +20,7 @@ import json
20
20
  from datetime import datetime
21
21
  from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
22
22
  from typing import Any, ClassVar, Dict, List, Optional
23
+ from eval_studio_client.api.models.v1_evaluation_type import V1EvaluationType
23
24
  from eval_studio_client.api.models.v1_insight import V1Insight
24
25
  from eval_studio_client.api.models.v1_leaderboard_status import V1LeaderboardStatus
25
26
  from eval_studio_client.api.models.v1_leaderboard_type import V1LeaderboardType
@@ -57,7 +58,8 @@ class V1Leaderboard(BaseModel):
57
58
  type: Optional[V1LeaderboardType] = None
58
59
  demo: Optional[StrictBool] = Field(default=None, description="Output only. Whether the Leaderboard is a demo resource or not. Demo resources are read only.")
59
60
  test_lab: Optional[StrictStr] = Field(default=None, description="Optional. Resource name of the TestLab if Leaderboard was created from a imported TestLab.", alias="testLab")
60
- __properties: ClassVar[List[str]] = ["name", "createTime", "creator", "updateTime", "updater", "deleteTime", "deleter", "displayName", "description", "status", "evaluator", "tests", "model", "createOperation", "leaderboardReport", "leaderboardTable", "leaderboardSummary", "llmModels", "leaderboardProblems", "evaluatorParameters", "insights", "modelParameters", "h2ogpteCollection", "type", "demo", "testLab"]
61
+ evaluation_type: Optional[V1EvaluationType] = Field(default=None, alias="evaluationType")
62
+ __properties: ClassVar[List[str]] = ["name", "createTime", "creator", "updateTime", "updater", "deleteTime", "deleter", "displayName", "description", "status", "evaluator", "tests", "model", "createOperation", "leaderboardReport", "leaderboardTable", "leaderboardSummary", "llmModels", "leaderboardProblems", "evaluatorParameters", "insights", "modelParameters", "h2ogpteCollection", "type", "demo", "testLab", "evaluationType"]
61
63
 
62
64
  model_config = ConfigDict(
63
65
  populate_by_name=True,
@@ -175,7 +177,8 @@ class V1Leaderboard(BaseModel):
175
177
  "h2ogpteCollection": obj.get("h2ogpteCollection"),
176
178
  "type": obj.get("type"),
177
179
  "demo": obj.get("demo"),
178
- "testLab": obj.get("testLab")
180
+ "testLab": obj.get("testLab"),
181
+ "evaluationType": obj.get("evaluationType")
179
182
  }, strict=False)
180
183
  return _obj
181
184
 
@@ -0,0 +1,93 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_comparison_result import V1ComparisonResult
23
+ from typing import Optional, Set
24
+ from typing_extensions import Self
25
+
26
+ class V1LeaderboardCmpReport(BaseModel):
27
+ """
28
+ V1LeaderboardCmpReport
29
+ """ # noqa: E501
30
+ summary: Optional[StrictStr] = Field(default=None, description="Summary of the comparison.")
31
+ comparison_result: Optional[V1ComparisonResult] = Field(default=None, alias="comparisonResult")
32
+ __properties: ClassVar[List[str]] = ["summary", "comparisonResult"]
33
+
34
+ model_config = ConfigDict(
35
+ populate_by_name=True,
36
+ validate_assignment=True,
37
+ protected_namespaces=(),
38
+ )
39
+
40
+
41
+ def to_str(self) -> str:
42
+ """Returns the string representation of the model using alias"""
43
+ return pprint.pformat(self.model_dump(by_alias=True))
44
+
45
+ def to_json(self) -> str:
46
+ """Returns the JSON representation of the model using alias"""
47
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
48
+ return json.dumps(self.to_dict())
49
+
50
+ @classmethod
51
+ def from_json(cls, json_str: str) -> Optional[Self]:
52
+ """Create an instance of V1LeaderboardCmpReport from a JSON string"""
53
+ return cls.from_dict(json.loads(json_str))
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ """Return the dictionary representation of the model using alias.
57
+
58
+ This has the following differences from calling pydantic's
59
+ `self.model_dump(by_alias=True)`:
60
+
61
+ * `None` is only added to the output dict for nullable fields that
62
+ were set at model initialization. Other fields with value `None`
63
+ are ignored.
64
+ """
65
+ excluded_fields: Set[str] = set([
66
+ ])
67
+
68
+ _dict = self.model_dump(
69
+ by_alias=True,
70
+ exclude=excluded_fields,
71
+ exclude_none=True,
72
+ )
73
+ # override the default output from pydantic by calling `to_dict()` of comparison_result
74
+ if self.comparison_result:
75
+ _dict['comparisonResult'] = self.comparison_result.to_dict()
76
+ return _dict
77
+
78
+ @classmethod
79
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
80
+ """Create an instance of V1LeaderboardCmpReport from a dict"""
81
+ if obj is None:
82
+ return None
83
+
84
+ if not isinstance(obj, dict):
85
+ return cls.model_validate(obj, strict=False)
86
+
87
+ _obj = cls.model_validate({
88
+ "summary": obj.get("summary"),
89
+ "comparisonResult": V1ComparisonResult.from_dict(obj["comparisonResult"]) if obj.get("comparisonResult") is not None else None
90
+ }, strict=False)
91
+ return _obj
92
+
93
+
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class V1LeaderboardComparisonItem(BaseModel):
26
+ """
27
+ V1LeaderboardComparisonItem
28
+ """ # noqa: E501
29
+ wins: Optional[StrictInt] = Field(default=None, description="Number of wins.")
30
+ question: Optional[StrictStr] = Field(default=None, description="Question text.")
31
+ changed_metrics_count: Optional[StrictInt] = Field(default=None, description="Count of changed metrics.", alias="changedMetricsCount")
32
+ __properties: ClassVar[List[str]] = ["wins", "question", "changedMetricsCount"]
33
+
34
+ model_config = ConfigDict(
35
+ populate_by_name=True,
36
+ validate_assignment=True,
37
+ protected_namespaces=(),
38
+ )
39
+
40
+
41
+ def to_str(self) -> str:
42
+ """Returns the string representation of the model using alias"""
43
+ return pprint.pformat(self.model_dump(by_alias=True))
44
+
45
+ def to_json(self) -> str:
46
+ """Returns the JSON representation of the model using alias"""
47
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
48
+ return json.dumps(self.to_dict())
49
+
50
+ @classmethod
51
+ def from_json(cls, json_str: str) -> Optional[Self]:
52
+ """Create an instance of V1LeaderboardComparisonItem from a JSON string"""
53
+ return cls.from_dict(json.loads(json_str))
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ """Return the dictionary representation of the model using alias.
57
+
58
+ This has the following differences from calling pydantic's
59
+ `self.model_dump(by_alias=True)`:
60
+
61
+ * `None` is only added to the output dict for nullable fields that
62
+ were set at model initialization. Other fields with value `None`
63
+ are ignored.
64
+ """
65
+ excluded_fields: Set[str] = set([
66
+ ])
67
+
68
+ _dict = self.model_dump(
69
+ by_alias=True,
70
+ exclude=excluded_fields,
71
+ exclude_none=True,
72
+ )
73
+ return _dict
74
+
75
+ @classmethod
76
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
77
+ """Create an instance of V1LeaderboardComparisonItem from a dict"""
78
+ if obj is None:
79
+ return None
80
+
81
+ if not isinstance(obj, dict):
82
+ return cls.model_validate(obj, strict=False)
83
+
84
+ _obj = cls.model_validate({
85
+ "wins": obj.get("wins"),
86
+ "question": obj.get("question"),
87
+ "changedMetricsCount": obj.get("changedMetricsCount")
88
+ }, strict=False)
89
+ return _obj
90
+
91
+
@@ -0,0 +1,97 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_leaderboard_comparison_item import V1LeaderboardComparisonItem
23
+ from typing import Optional, Set
24
+ from typing_extensions import Self
25
+
26
+ class V1LeaderboardInfo(BaseModel):
27
+ """
28
+ V1LeaderboardInfo
29
+ """ # noqa: E501
30
+ key: Optional[StrictStr] = Field(default=None, description="Unique key identifying the leaderboard pair (format: \"baseline_id|current_id\").")
31
+ items: Optional[List[V1LeaderboardComparisonItem]] = Field(default=None, description="List of leaderboard comparison items.")
32
+ __properties: ClassVar[List[str]] = ["key", "items"]
33
+
34
+ model_config = ConfigDict(
35
+ populate_by_name=True,
36
+ validate_assignment=True,
37
+ protected_namespaces=(),
38
+ )
39
+
40
+
41
+ def to_str(self) -> str:
42
+ """Returns the string representation of the model using alias"""
43
+ return pprint.pformat(self.model_dump(by_alias=True))
44
+
45
+ def to_json(self) -> str:
46
+ """Returns the JSON representation of the model using alias"""
47
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
48
+ return json.dumps(self.to_dict())
49
+
50
+ @classmethod
51
+ def from_json(cls, json_str: str) -> Optional[Self]:
52
+ """Create an instance of V1LeaderboardInfo from a JSON string"""
53
+ return cls.from_dict(json.loads(json_str))
54
+
55
+ def to_dict(self) -> Dict[str, Any]:
56
+ """Return the dictionary representation of the model using alias.
57
+
58
+ This has the following differences from calling pydantic's
59
+ `self.model_dump(by_alias=True)`:
60
+
61
+ * `None` is only added to the output dict for nullable fields that
62
+ were set at model initialization. Other fields with value `None`
63
+ are ignored.
64
+ """
65
+ excluded_fields: Set[str] = set([
66
+ ])
67
+
68
+ _dict = self.model_dump(
69
+ by_alias=True,
70
+ exclude=excluded_fields,
71
+ exclude_none=True,
72
+ )
73
+ # override the default output from pydantic by calling `to_dict()` of each item in items (list)
74
+ _items = []
75
+ if self.items:
76
+ for _item_items in self.items:
77
+ if _item_items:
78
+ _items.append(_item_items.to_dict())
79
+ _dict['items'] = _items
80
+ return _dict
81
+
82
+ @classmethod
83
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
84
+ """Create an instance of V1LeaderboardInfo from a dict"""
85
+ if obj is None:
86
+ return None
87
+
88
+ if not isinstance(obj, dict):
89
+ return cls.model_validate(obj, strict=False)
90
+
91
+ _obj = cls.model_validate({
92
+ "key": obj.get("key"),
93
+ "items": [V1LeaderboardComparisonItem.from_dict(_item) for _item in obj["items"]] if obj.get("items") is not None else None
94
+ }, strict=False)
95
+ return _obj
96
+
97
+
@@ -19,17 +19,23 @@ import json
19
19
 
20
20
  from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
21
  from typing import Any, ClassVar, Dict, List, Optional
22
+ from eval_studio_client.api.models.v1_agent_chat_activity_diagram import V1AgentChatActivityDiagram
23
+ from eval_studio_client.api.models.v1_agent_chat_scripts_bar_chart import V1AgentChatScriptsBarChart
24
+ from eval_studio_client.api.models.v1_agent_chat_tools_bar_chart import V1AgentChatToolsBarChart
22
25
  from eval_studio_client.api.models.v1_leaderboard_report_actual_output_data import V1LeaderboardReportActualOutputData
23
26
  from typing import Optional, Set
24
27
  from typing_extensions import Self
25
28
 
26
29
  class V1LeaderboardReportActualOutputMeta(BaseModel):
27
30
  """
28
- ActualOutputMeta represents the metadata about the actual output.
31
+ ActualOutputMeta represents the metadata about the actual output. Each instance can contain any combination of the fields below.
29
32
  """ # noqa: E501
30
- tokenization: Optional[StrictStr] = Field(default=None, description="Output only. Actual output data tokenization like sentence_level_punkt.")
31
- data: Optional[List[V1LeaderboardReportActualOutputData]] = Field(default=None, description="Output only. Actual output data - list of text fragments coupled with the metric values.")
32
- __properties: ClassVar[List[str]] = ["tokenization", "data"]
33
+ tokenization: Optional[StrictStr] = Field(default=None, description="Optional. Actual output data tokenization like sentence_level_punkt.")
34
+ data: Optional[List[V1LeaderboardReportActualOutputData]] = Field(default=None, description="Optional. Actual output data - list of text fragments coupled with the metric values.")
35
+ agent_chat_activity_diagram: Optional[V1AgentChatActivityDiagram] = Field(default=None, alias="agentChatActivityDiagram")
36
+ agent_chat_tools_bar_chart: Optional[V1AgentChatToolsBarChart] = Field(default=None, alias="agentChatToolsBarChart")
37
+ agent_chat_scripts_bar_chart: Optional[V1AgentChatScriptsBarChart] = Field(default=None, alias="agentChatScriptsBarChart")
38
+ __properties: ClassVar[List[str]] = ["tokenization", "data", "agentChatActivityDiagram", "agentChatToolsBarChart", "agentChatScriptsBarChart"]
33
39
 
34
40
  model_config = ConfigDict(
35
41
  populate_by_name=True,
@@ -61,12 +67,8 @@ class V1LeaderboardReportActualOutputMeta(BaseModel):
61
67
  * `None` is only added to the output dict for nullable fields that
62
68
  were set at model initialization. Other fields with value `None`
63
69
  are ignored.
64
- * OpenAPI `readOnly` fields are excluded.
65
- * OpenAPI `readOnly` fields are excluded.
66
70
  """
67
71
  excluded_fields: Set[str] = set([
68
- "tokenization",
69
- "data",
70
72
  ])
71
73
 
72
74
  _dict = self.model_dump(
@@ -81,6 +83,15 @@ class V1LeaderboardReportActualOutputMeta(BaseModel):
81
83
  if _item_data:
82
84
  _items.append(_item_data.to_dict())
83
85
  _dict['data'] = _items
86
+ # override the default output from pydantic by calling `to_dict()` of agent_chat_activity_diagram
87
+ if self.agent_chat_activity_diagram:
88
+ _dict['agentChatActivityDiagram'] = self.agent_chat_activity_diagram.to_dict()
89
+ # override the default output from pydantic by calling `to_dict()` of agent_chat_tools_bar_chart
90
+ if self.agent_chat_tools_bar_chart:
91
+ _dict['agentChatToolsBarChart'] = self.agent_chat_tools_bar_chart.to_dict()
92
+ # override the default output from pydantic by calling `to_dict()` of agent_chat_scripts_bar_chart
93
+ if self.agent_chat_scripts_bar_chart:
94
+ _dict['agentChatScriptsBarChart'] = self.agent_chat_scripts_bar_chart.to_dict()
84
95
  return _dict
85
96
 
86
97
  @classmethod
@@ -94,7 +105,10 @@ class V1LeaderboardReportActualOutputMeta(BaseModel):
94
105
 
95
106
  _obj = cls.model_validate({
96
107
  "tokenization": obj.get("tokenization"),
97
- "data": [V1LeaderboardReportActualOutputData.from_dict(_item) for _item in obj["data"]] if obj.get("data") is not None else None
108
+ "data": [V1LeaderboardReportActualOutputData.from_dict(_item) for _item in obj["data"]] if obj.get("data") is not None else None,
109
+ "agentChatActivityDiagram": V1AgentChatActivityDiagram.from_dict(obj["agentChatActivityDiagram"]) if obj.get("agentChatActivityDiagram") is not None else None,
110
+ "agentChatToolsBarChart": V1AgentChatToolsBarChart.from_dict(obj["agentChatToolsBarChart"]) if obj.get("agentChatToolsBarChart") is not None else None,
111
+ "agentChatScriptsBarChart": V1AgentChatScriptsBarChart.from_dict(obj["agentChatScriptsBarChart"]) if obj.get("agentChatScriptsBarChart") is not None else None
98
112
  }, strict=False)
99
113
  return _obj
100
114
 
@@ -19,6 +19,7 @@ import json
19
19
 
20
20
  from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt, StrictStr
21
21
  from typing import Any, ClassVar, Dict, List, Optional, Union
22
+ from eval_studio_client.api.models.v1_human_decision import V1HumanDecision
22
23
  from eval_studio_client.api.models.v1_leaderboard_report_actual_output_meta import V1LeaderboardReportActualOutputMeta
23
24
  from eval_studio_client.api.models.v1_leaderboard_report_result_relationship import V1LeaderboardReportResultRelationship
24
25
  from eval_studio_client.api.models.v1_metric_score import V1MetricScore
@@ -31,22 +32,25 @@ class V1LeaderboardReportResult(BaseModel):
31
32
  """ # noqa: E501
32
33
  key: Optional[StrictStr] = Field(default=None, description="Output only. Composite unique key of the result formed by the model key and test case key.")
33
34
  input: Optional[StrictStr] = Field(default=None, description="Output only. Input prompt or text to be processed.")
34
- corpus: Optional[List[StrictStr]] = Field(default=None, description="Output only. Collection of corpus documents to be used during evaluation.")
35
- context: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of contextual information or references.")
36
- categories: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of categories or labels for classification.")
37
- relationships: Optional[List[V1LeaderboardReportResultRelationship]] = Field(default=None, description="Output only. List of relationships or associations between entities.")
35
+ corpus: Optional[List[StrictStr]] = Field(default=None, description="Output only. Collection of corpus documents to be used during evaluation. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
36
+ context: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of contextual information or references. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
37
+ categories: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of categories or labels for classification. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
38
+ relationships: Optional[List[V1LeaderboardReportResultRelationship]] = Field(default=None, description="Output only. List of relationships or associations between entities. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
38
39
  expected_output: Optional[StrictStr] = Field(default=None, description="Output only. Expected output or target result.", alias="expectedOutput")
39
- output_constraints: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of constraints that should be applied to the output.", alias="outputConstraints")
40
- output_condition: Optional[StrictStr] = Field(default=None, description="Output only. Condition that output should satisfy.", alias="outputCondition")
40
+ output_constraints: Optional[List[StrictStr]] = Field(default=None, description="Output only. List of constraints that should be applied to the output. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.", alias="outputConstraints")
41
+ output_condition: Optional[StrictStr] = Field(default=None, description="Output only. Condition that output should satisfy. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.", alias="outputCondition")
41
42
  actual_output: Optional[StrictStr] = Field(default=None, description="Output only. Actual output produced by the model.", alias="actualOutput")
42
- actual_duration: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Duration of processing in seconds.", alias="actualDuration")
43
- cost: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Cost of processing in currency units.")
43
+ actual_duration: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Duration of processing in seconds. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.", alias="actualDuration")
44
+ cost: Optional[Union[StrictFloat, StrictInt]] = Field(default=None, description="Output only. Cost of processing in currency units. Omitted if LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY is used.")
44
45
  model_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the model used.", alias="modelKey")
45
46
  test_case_key: Optional[StrictStr] = Field(default=None, description="Output only. Unique identifier for the test case.", alias="testCaseKey")
46
47
  metrics: Optional[List[V1MetricScore]] = Field(default=None, description="Optional. All metrics values for the result.")
47
48
  result_error_message: Optional[StrictStr] = Field(default=None, description="Output only. Error message if processing resulted in failure.", alias="resultErrorMessage")
48
49
  actual_output_meta: Optional[List[V1LeaderboardReportActualOutputMeta]] = Field(default=None, description="Output only. Additional metadata about the actual output.", alias="actualOutputMeta")
49
- __properties: ClassVar[List[str]] = ["key", "input", "corpus", "context", "categories", "relationships", "expectedOutput", "outputConstraints", "outputCondition", "actualOutput", "actualDuration", "cost", "modelKey", "testCaseKey", "metrics", "resultErrorMessage", "actualOutputMeta"]
50
+ human_decision: Optional[V1HumanDecision] = Field(default=None, alias="humanDecision")
51
+ comment: Optional[StrictStr] = Field(default=None, description="Output only. Optional comment about the result.")
52
+ annotations: Optional[Dict[str, Dict[str, Any]]] = Field(default=None, description="Output only. Annotations associated with the test case result.")
53
+ __properties: ClassVar[List[str]] = ["key", "input", "corpus", "context", "categories", "relationships", "expectedOutput", "outputConstraints", "outputCondition", "actualOutput", "actualDuration", "cost", "modelKey", "testCaseKey", "metrics", "resultErrorMessage", "actualOutputMeta", "humanDecision", "comment", "annotations"]
50
54
 
51
55
  model_config = ConfigDict(
52
56
  populate_by_name=True,
@@ -94,6 +98,8 @@ class V1LeaderboardReportResult(BaseModel):
94
98
  * OpenAPI `readOnly` fields are excluded.
95
99
  * OpenAPI `readOnly` fields are excluded.
96
100
  * OpenAPI `readOnly` fields are excluded.
101
+ * OpenAPI `readOnly` fields are excluded.
102
+ * OpenAPI `readOnly` fields are excluded.
97
103
  """
98
104
  excluded_fields: Set[str] = set([
99
105
  "key",
@@ -112,6 +118,8 @@ class V1LeaderboardReportResult(BaseModel):
112
118
  "test_case_key",
113
119
  "result_error_message",
114
120
  "actual_output_meta",
121
+ "comment",
122
+ "annotations",
115
123
  ])
116
124
 
117
125
  _dict = self.model_dump(
@@ -168,7 +176,10 @@ class V1LeaderboardReportResult(BaseModel):
168
176
  "testCaseKey": obj.get("testCaseKey"),
169
177
  "metrics": [V1MetricScore.from_dict(_item) for _item in obj["metrics"]] if obj.get("metrics") is not None else None,
170
178
  "resultErrorMessage": obj.get("resultErrorMessage"),
171
- "actualOutputMeta": [V1LeaderboardReportActualOutputMeta.from_dict(_item) for _item in obj["actualOutputMeta"]] if obj.get("actualOutputMeta") is not None else None
179
+ "actualOutputMeta": [V1LeaderboardReportActualOutputMeta.from_dict(_item) for _item in obj["actualOutputMeta"]] if obj.get("actualOutputMeta") is not None else None,
180
+ "humanDecision": obj.get("humanDecision"),
181
+ "comment": obj.get("comment"),
182
+ "annotations": obj.get("annotations")
172
183
  }, strict=False)
173
184
  return _obj
174
185
 
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import json
17
+ from enum import Enum
18
+ from typing_extensions import Self
19
+
20
+
21
+ class V1LeaderboardReportResultView(str, Enum):
22
+ """
23
+ - LEADERBOARD_REPORT_RESULT_VIEW_UNSPECIFIED: Default value. The basic view with essential fields. - LEADERBOARD_REPORT_RESULT_VIEW_FULL: Full view with all fields populated. - LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY: Summary view with only key fields.
24
+ """
25
+
26
+ """
27
+ allowed enum values
28
+ """
29
+ LEADERBOARD_REPORT_RESULT_VIEW_UNSPECIFIED = 'LEADERBOARD_REPORT_RESULT_VIEW_UNSPECIFIED'
30
+ LEADERBOARD_REPORT_RESULT_VIEW_FULL = 'LEADERBOARD_REPORT_RESULT_VIEW_FULL'
31
+ LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY = 'LEADERBOARD_REPORT_RESULT_VIEW_SUMMARY'
32
+
33
+ @classmethod
34
+ def from_json(cls, json_str: str) -> Self:
35
+ """Create an instance of V1LeaderboardReportResultView from a JSON string"""
36
+ return cls(json.loads(json_str))
37
+
38
+