eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. eval_studio_client/api/__init__.py +65 -0
  2. eval_studio_client/api/api/__init__.py +3 -0
  3. eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
  4. eval_studio_client/api/api/dashboard_service_api.py +16 -16
  5. eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
  6. eval_studio_client/api/api/document_service_api.py +16 -16
  7. eval_studio_client/api/api/evaluation_service_api.py +12 -12
  8. eval_studio_client/api/api/evaluator_service_api.py +16 -16
  9. eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
  10. eval_studio_client/api/api/leaderboard_service_api.py +554 -16
  11. eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
  12. eval_studio_client/api/api/model_service_api.py +16 -16
  13. eval_studio_client/api/api/operation_service_api.py +821 -17
  14. eval_studio_client/api/api/perturbator_service_api.py +22 -22
  15. eval_studio_client/api/api/test_case_service_api.py +300 -16
  16. eval_studio_client/api/api/test_class_service_api.py +16 -16
  17. eval_studio_client/api/api/test_service_api.py +285 -16
  18. eval_studio_client/api/api/workflow_node_service_api.py +16 -16
  19. eval_studio_client/api/api/workflow_service_api.py +16 -16
  20. eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
  21. eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
  22. eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
  23. eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
  24. eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
  25. eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
  26. eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
  27. eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
  28. eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
  29. eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
  30. eval_studio_client/api/docs/ModelServiceApi.md +5 -5
  31. eval_studio_client/api/docs/OperationServiceApi.md +215 -8
  32. eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
  33. eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
  34. eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
  35. eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
  36. eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
  37. eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
  38. eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
  39. eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
  40. eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
  41. eval_studio_client/api/docs/TestServiceApi.md +73 -5
  42. eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
  43. eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
  44. eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
  45. eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
  46. eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
  47. eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
  48. eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
  49. eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
  50. eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
  51. eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
  52. eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
  53. eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
  54. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
  55. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
  56. eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
  57. eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
  58. eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
  59. eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
  60. eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
  61. eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
  62. eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
  63. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
  64. eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
  65. eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
  66. eval_studio_client/api/docs/V1DashboardReport.md +31 -0
  67. eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
  68. eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
  69. eval_studio_client/api/docs/V1DataFragment.md +31 -0
  70. eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
  71. eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
  72. eval_studio_client/api/docs/V1DiffItem.md +36 -0
  73. eval_studio_client/api/docs/V1EvaluationType.md +12 -0
  74. eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
  75. eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
  76. eval_studio_client/api/docs/V1HumanDecision.md +12 -0
  77. eval_studio_client/api/docs/V1Info.md +1 -0
  78. eval_studio_client/api/docs/V1Leaderboard.md +1 -0
  79. eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
  80. eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
  81. eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
  82. eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
  83. eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
  84. eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
  85. eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
  86. eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
  87. eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
  88. eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
  89. eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
  90. eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
  91. eval_studio_client/api/docs/V1Metric.md +30 -0
  92. eval_studio_client/api/docs/V1MetricAverage.md +36 -0
  93. eval_studio_client/api/docs/V1MetricMeta.md +40 -0
  94. eval_studio_client/api/docs/V1MetricScore.md +1 -1
  95. eval_studio_client/api/docs/V1MetricScores.md +1 -1
  96. eval_studio_client/api/docs/V1ModelType.md +1 -1
  97. eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
  98. eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
  99. eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
  100. eval_studio_client/api/docs/V1Operation.md +1 -0
  101. eval_studio_client/api/docs/V1OperationView.md +12 -0
  102. eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
  103. eval_studio_client/api/docs/V1Stats.md +2 -0
  104. eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
  105. eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
  106. eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
  107. eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
  108. eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
  109. eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
  110. eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
  111. eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
  112. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
  113. eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
  114. eval_studio_client/api/models/__init__.py +62 -0
  115. eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
  116. eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
  117. eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
  118. eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
  119. eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
  120. eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
  121. eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
  122. eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
  123. eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
  124. eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
  125. eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
  126. eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
  127. eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
  128. eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
  129. eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
  130. eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
  131. eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
  132. eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
  133. eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
  134. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
  135. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
  136. eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
  137. eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
  138. eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
  139. eval_studio_client/api/models/v1_comparison_item.py +130 -0
  140. eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
  141. eval_studio_client/api/models/v1_comparison_result.py +120 -0
  142. eval_studio_client/api/models/v1_comparison_summary.py +91 -0
  143. eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
  144. eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
  145. eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
  146. eval_studio_client/api/models/v1_dashboard_report.py +109 -0
  147. eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
  148. eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
  149. eval_studio_client/api/models/v1_data_fragment.py +91 -0
  150. eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
  151. eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
  152. eval_studio_client/api/models/v1_diff_item.py +137 -0
  153. eval_studio_client/api/models/v1_evaluation_type.py +39 -0
  154. eval_studio_client/api/models/v1_flipped_metric.py +91 -0
  155. eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
  156. eval_studio_client/api/models/v1_human_decision.py +38 -0
  157. eval_studio_client/api/models/v1_info.py +4 -2
  158. eval_studio_client/api/models/v1_leaderboard.py +5 -2
  159. eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
  160. eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
  161. eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
  162. eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
  163. eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
  164. eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
  165. eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
  166. eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
  167. eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
  168. eval_studio_client/api/models/v1_list_operations_response.py +5 -3
  169. eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
  170. eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
  171. eval_studio_client/api/models/v1_metric.py +89 -0
  172. eval_studio_client/api/models/v1_metric_average.py +101 -0
  173. eval_studio_client/api/models/v1_metric_meta.py +109 -0
  174. eval_studio_client/api/models/v1_metric_score.py +6 -1
  175. eval_studio_client/api/models/v1_metric_scores.py +1 -1
  176. eval_studio_client/api/models/v1_model_type.py +2 -1
  177. eval_studio_client/api/models/v1_models_comparisons.py +93 -0
  178. eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
  179. eval_studio_client/api/models/v1_models_overview.py +97 -0
  180. eval_studio_client/api/models/v1_operation.py +6 -2
  181. eval_studio_client/api/models/v1_operation_view.py +38 -0
  182. eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
  183. eval_studio_client/api/models/v1_stats.py +16 -2
  184. eval_studio_client/api/models/v1_technical_metrics.py +96 -0
  185. eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
  186. eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
  187. eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
  188. eval_studio_client/api/models/v1_test_case_result.py +157 -0
  189. eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
  190. eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
  191. eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
  192. eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
  193. eval_studio_client/api/models/v1_workflow_type.py +1 -0
  194. eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
  195. eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
  196. eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
  197. eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
  198. eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
  199. eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
  200. eval_studio_client/api/test/test_operation_service_api.py +18 -0
  201. eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
  202. eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
  203. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
  204. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
  205. eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
  206. eval_studio_client/api/test/test_test_case_service_api.py +6 -0
  207. eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
  208. eval_studio_client/api/test/test_test_service_api.py +6 -0
  209. eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
  210. eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
  211. eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
  212. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
  213. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
  214. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
  215. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
  216. eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
  217. eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
  218. eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
  219. eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
  220. eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
  221. eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
  222. eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
  223. eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
  224. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
  225. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
  226. eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
  227. eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
  228. eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
  229. eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
  230. eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
  231. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
  232. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
  233. eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
  234. eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
  235. eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
  236. eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
  237. eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
  238. eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
  239. eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
  240. eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
  241. eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
  242. eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
  243. eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
  244. eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
  245. eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
  246. eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
  247. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
  248. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
  249. eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
  250. eval_studio_client/api/test/test_v1_diff_item.py +226 -0
  251. eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
  252. eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
  253. eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
  254. eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
  255. eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
  256. eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
  257. eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
  258. eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
  259. eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
  260. eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
  261. eval_studio_client/api/test/test_v1_human_decision.py +33 -0
  262. eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
  263. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
  264. eval_studio_client/api/test/test_v1_info.py +4 -1
  265. eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
  266. eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
  267. eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
  268. eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
  269. eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
  270. eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
  271. eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
  272. eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
  273. eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
  274. eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
  275. eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
  276. eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
  277. eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
  278. eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
  279. eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
  280. eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
  281. eval_studio_client/api/test/test_v1_metric.py +52 -0
  282. eval_studio_client/api/test/test_v1_metric_average.py +58 -0
  283. eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
  284. eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
  285. eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
  286. eval_studio_client/api/test/test_v1_models_overview.py +60 -0
  287. eval_studio_client/api/test/test_v1_operation.py +2 -1
  288. eval_studio_client/api/test/test_v1_operation_view.py +33 -0
  289. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
  290. eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
  291. eval_studio_client/api/test/test_v1_stats.py +3 -1
  292. eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
  293. eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
  294. eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
  295. eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
  296. eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
  297. eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
  298. eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
  299. eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
  300. eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
  301. eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
  302. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
  303. eval_studio_client/models.py +18 -6
  304. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/METADATA +2 -2
  305. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/RECORD +306 -111
  306. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,72 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_dashboard_report_result import V1DashboardReportResult
18
+
19
+ class TestV1DashboardReportResult(unittest.TestCase):
20
+ """V1DashboardReportResult unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1DashboardReportResult:
29
+ """Test V1DashboardReportResult
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1DashboardReportResult`
34
+ """
35
+ model = V1DashboardReportResult()
36
+ if include_optional:
37
+ return V1DashboardReportResult(
38
+ key = '',
39
+ input = '',
40
+ expected_output = '',
41
+ actual_output = '',
42
+ model_key = '',
43
+ test_case_key = '',
44
+ metrics = {
45
+ 'key' : eval_studio_client.api.models.v1_metric_scores.v1MetricScores(
46
+ scores = [
47
+ eval_studio_client.api.models.v1_metric_score.v1MetricScore(
48
+ key = '',
49
+ value = 1.337, )
50
+ ], )
51
+ },
52
+ result_error_map = {
53
+ 'key' : ''
54
+ },
55
+ human_decision = 'HUMAN_DECISION_UNSPECIFIED',
56
+ comment = '',
57
+ annotations = {
58
+ 'key' : None
59
+ }
60
+ )
61
+ else:
62
+ return V1DashboardReportResult(
63
+ )
64
+ """
65
+
66
+ def testV1DashboardReportResult(self):
67
+ """Test V1DashboardReportResult"""
68
+ # inst_req_only = self.make_instance(include_optional=False)
69
+ # inst_req_and_optional = self.make_instance(include_optional=True)
70
+
71
+ if __name__ == '__main__':
72
+ unittest.main()
@@ -0,0 +1,58 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_dashboard_test_case_annotation import V1DashboardTestCaseAnnotation
18
+
19
+ class TestV1DashboardTestCaseAnnotation(unittest.TestCase):
20
+ """V1DashboardTestCaseAnnotation unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1DashboardTestCaseAnnotation:
29
+ """Test V1DashboardTestCaseAnnotation
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1DashboardTestCaseAnnotation`
34
+ """
35
+ model = V1DashboardTestCaseAnnotation()
36
+ if include_optional:
37
+ return V1DashboardTestCaseAnnotation(
38
+ name = '',
39
+ create_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
40
+ creator = '',
41
+ update_time = datetime.datetime.strptime('2013-10-20 19:20:30.00', '%Y-%m-%d %H:%M:%S.%f'),
42
+ updater = '',
43
+ parent = '',
44
+ key = '',
45
+ value = None
46
+ )
47
+ else:
48
+ return V1DashboardTestCaseAnnotation(
49
+ )
50
+ """
51
+
52
+ def testV1DashboardTestCaseAnnotation(self):
53
+ """Test V1DashboardTestCaseAnnotation"""
54
+ # inst_req_only = self.make_instance(include_optional=False)
55
+ # inst_req_and_optional = self.make_instance(include_optional=True)
56
+
57
+ if __name__ == '__main__':
58
+ unittest.main()
@@ -0,0 +1,57 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_data_fragment import V1DataFragment
18
+
19
+ class TestV1DataFragment(unittest.TestCase):
20
+ """V1DataFragment unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1DataFragment:
29
+ """Test V1DataFragment
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1DataFragment`
34
+ """
35
+ model = V1DataFragment()
36
+ if include_optional:
37
+ return V1DataFragment(
38
+ text = '',
39
+ metrics = {
40
+ 'key' : 1.337
41
+ },
42
+ meta = {
43
+ 'key' : ''
44
+ }
45
+ )
46
+ else:
47
+ return V1DataFragment(
48
+ )
49
+ """
50
+
51
+ def testV1DataFragment(self):
52
+ """Test V1DataFragment"""
53
+ # inst_req_only = self.make_instance(include_optional=False)
54
+ # inst_req_and_optional = self.make_instance(include_optional=True)
55
+
56
+ if __name__ == '__main__':
57
+ unittest.main()
@@ -0,0 +1,55 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_deep_compare_leaderboards_request import V1DeepCompareLeaderboardsRequest
18
+
19
+ class TestV1DeepCompareLeaderboardsRequest(unittest.TestCase):
20
+ """V1DeepCompareLeaderboardsRequest unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1DeepCompareLeaderboardsRequest:
29
+ """Test V1DeepCompareLeaderboardsRequest
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1DeepCompareLeaderboardsRequest`
34
+ """
35
+ model = V1DeepCompareLeaderboardsRequest()
36
+ if include_optional:
37
+ return V1DeepCompareLeaderboardsRequest(
38
+ leaderboard_baseline_name = '',
39
+ leaderboard_current_name = '',
40
+ text_similarity_metric = 'TEXT_SIMILARITY_METRIC_UNSPECIFIED',
41
+ llm_model_baseline_name = '',
42
+ llm_model_current_name = ''
43
+ )
44
+ else:
45
+ return V1DeepCompareLeaderboardsRequest(
46
+ )
47
+ """
48
+
49
+ def testV1DeepCompareLeaderboardsRequest(self):
50
+ """Test V1DeepCompareLeaderboardsRequest"""
51
+ # inst_req_only = self.make_instance(include_optional=False)
52
+ # inst_req_and_optional = self.make_instance(include_optional=True)
53
+
54
+ if __name__ == '__main__':
55
+ unittest.main()
@@ -0,0 +1,255 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_deep_compare_leaderboards_response import V1DeepCompareLeaderboardsResponse
18
+
19
+ class TestV1DeepCompareLeaderboardsResponse(unittest.TestCase):
20
+ """V1DeepCompareLeaderboardsResponse unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1DeepCompareLeaderboardsResponse:
29
+ """Test V1DeepCompareLeaderboardsResponse
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1DeepCompareLeaderboardsResponse`
34
+ """
35
+ model = V1DeepCompareLeaderboardsResponse()
36
+ if include_optional:
37
+ return V1DeepCompareLeaderboardsResponse(
38
+ leaderboard_cmp_report = eval_studio_client.api.models.v1_leaderboard_cmp_report.v1LeaderboardCmpReport(
39
+ summary = '',
40
+ comparison_result = eval_studio_client.api.models.complete_comparison_result_structure.Complete comparison result structure(
41
+ diffs = [
42
+ eval_studio_client.api.models.a_single_diff_item_comparing_two_leaderboards.A single diff item comparing two leaderboards(
43
+ diff_key = '',
44
+ items = [
45
+ eval_studio_client.api.models.a_single_comparison_item_showing_differences_between_baseline_and_current.A single comparison item showing differences between baseline and current(
46
+ question = '',
47
+ diff_flipped_metrics = [
48
+ eval_studio_client.api.models.flipped_metric_information.Flipped metric information(
49
+ metric_name = '',
50
+ baseline_value = 1.337,
51
+ current_value = 1.337, )
52
+ ],
53
+ baseline_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
54
+ key = '',
55
+ input = '',
56
+ corpus = [
57
+ ''
58
+ ],
59
+ context = [
60
+ ''
61
+ ],
62
+ categories = [
63
+ ''
64
+ ],
65
+ relationships = [
66
+ eval_studio_client.api.models.test_case_relationship_information.Test case relationship information(
67
+ type = '',
68
+ target = '',
69
+ target_type = '', )
70
+ ],
71
+ expected_output = '',
72
+ output_constraints = [
73
+ ''
74
+ ],
75
+ output_condition = '',
76
+ actual_output = '',
77
+ actual_duration = 1.337,
78
+ cost = 1.337,
79
+ model_key = '',
80
+ test_key = '',
81
+ test_case_key = '',
82
+ metrics = [
83
+ eval_studio_client.api.models.metric_information.Metric information(
84
+ key = '',
85
+ value = 1.337, )
86
+ ],
87
+ metrics_meta = {
88
+ 'key' : ''
89
+ },
90
+ actual_output_meta = [
91
+ eval_studio_client.api.models.actual_output_metadata.Actual output metadata(
92
+ tokenization = '',
93
+ data = [
94
+ eval_studio_client.api.models.data_fragment.Data fragment(
95
+ text = '',
96
+ meta = {
97
+ 'key' : ''
98
+ }, )
99
+ ], )
100
+ ],
101
+ metric_scores = [
102
+ eval_studio_client.api.models.comparison_metric_score_information_(specific_to_comparison_reports).Comparison metric score information (specific to comparison reports)(
103
+ metric_name = '',
104
+ metric_score = 1.337, )
105
+ ],
106
+ result_error_message = '', ),
107
+ baseline_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
108
+ sentences = [
109
+ ''
110
+ ],
111
+ sentences_count = 56,
112
+ common_sentences = [
113
+ ''
114
+ ],
115
+ common_count = 56,
116
+ unique_sentences = [
117
+ ''
118
+ ],
119
+ unique_count = 56,
120
+ identical = True,
121
+ sentence_similarity = {
122
+ 'key' : 1.337
123
+ }, ),
124
+ baseline_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
125
+ chunks = [
126
+ ''
127
+ ],
128
+ chunks_count = 56,
129
+ common_chunks = [
130
+ ''
131
+ ],
132
+ common_count = 56,
133
+ unique_chunks = [
134
+ ''
135
+ ],
136
+ unique_count = 56,
137
+ identical = True,
138
+ chunk_similarity = {
139
+ 'key' : 1.337
140
+ }, ),
141
+ current_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
142
+ key = '',
143
+ input = '',
144
+ expected_output = '',
145
+ output_condition = '',
146
+ actual_output = '',
147
+ actual_duration = 1.337,
148
+ cost = 1.337,
149
+ model_key = '',
150
+ test_key = '',
151
+ test_case_key = '',
152
+ result_error_message = '', ),
153
+ current_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
154
+ sentences_count = 56,
155
+ common_count = 56,
156
+ unique_count = 56,
157
+ identical = True, ),
158
+ current_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
159
+ chunks_count = 56,
160
+ common_count = 56,
161
+ unique_count = 56,
162
+ identical = True, ), )
163
+ ],
164
+ summary = eval_studio_client.api.models.comparison_summary.Comparison summary(
165
+ recommendation_winner = '',
166
+ recommendation = '',
167
+ recommendation_confidence = '', ),
168
+ models_overview = eval_studio_client.api.models.models_overview.Models overview(
169
+ baseline_model_key = '',
170
+ current_model_key = '',
171
+ baseline_model_name = '',
172
+ baseline_collection_id = [
173
+ ''
174
+ ],
175
+ current_model_name = '',
176
+ current_collection_id = [
177
+ ''
178
+ ], ),
179
+ models_comparisons = eval_studio_client.api.models.models_comparison_statistics.Models comparison statistics(
180
+ test_case_ranks_baseline = 56,
181
+ test_case_ranks_current = 56,
182
+ test_case_wins_baseline = 56,
183
+ test_case_wins_current = 56, ),
184
+ models_comparisons_metrics = eval_studio_client.api.models.detailed_metrics_comparisons.Detailed metrics comparisons(
185
+ metrics_ranks_baseline = 1.337,
186
+ metrics_ranks_current = 1.337,
187
+ metrics_wins_baseline = 56,
188
+ metrics_wins_current = 56,
189
+ metrics_averages = [
190
+ eval_studio_client.api.models.metric_average_comparison.Metric average comparison(
191
+ metric_key = '',
192
+ baseline_avg = 1.337,
193
+ current_avg = 1.337,
194
+ diff = 1.337,
195
+ baseline_better_wins = 56,
196
+ current_better_wins = 56,
197
+ baseline_rank_avg = 1.337,
198
+ current_rank_avg = 1.337, )
199
+ ], ),
200
+ technical_metrics = eval_studio_client.api.models.technical_metrics_for_model_performance.Technical metrics for model performance(
201
+ baseline = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
202
+ cost_sum = 1.337,
203
+ duration_sum = 1.337,
204
+ duration_min = 1.337,
205
+ duration_max = 1.337,
206
+ duration_avg = 1.337, ),
207
+ current = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
208
+ cost_sum = 1.337,
209
+ duration_sum = 1.337,
210
+ duration_min = 1.337,
211
+ duration_max = 1.337,
212
+ duration_avg = 1.337, ), ),
213
+ test_cases_leaderboard = [
214
+ eval_studio_client.api.models.test_case_leaderboard_item.Test case leaderboard item(
215
+ wins = 56,
216
+ question = '',
217
+ changed_metrics_count = 56, )
218
+ ], )
219
+ ],
220
+ leaderboards = [
221
+ eval_studio_client.api.models.leaderboard_information.Leaderboard information(
222
+ key = '', )
223
+ ],
224
+ metrics_meta = {
225
+ 'key' : eval_studio_client.api.models.metric_metadata.Metric metadata(
226
+ key = '',
227
+ display_name = '',
228
+ data_type = '',
229
+ display_value = '',
230
+ description = '',
231
+ value_range = [
232
+ 1.337
233
+ ],
234
+ value_enum = [
235
+ ''
236
+ ],
237
+ higher_is_better = True,
238
+ threshold = 1.337,
239
+ is_primary_metric = True,
240
+ parent_metric = '',
241
+ exclude = True, )
242
+ }, ), )
243
+ )
244
+ else:
245
+ return V1DeepCompareLeaderboardsResponse(
246
+ )
247
+ """
248
+
249
+ def testV1DeepCompareLeaderboardsResponse(self):
250
+ """Test V1DeepCompareLeaderboardsResponse"""
251
+ # inst_req_only = self.make_instance(include_optional=False)
252
+ # inst_req_and_optional = self.make_instance(include_optional=True)
253
+
254
+ if __name__ == '__main__':
255
+ unittest.main()
@@ -98,7 +98,8 @@ class TestV1DeleteLeaderboardResponse(unittest.TestCase):
98
98
  h2ogpte_collection = '',
99
99
  type = 'LEADERBOARD_TYPE_UNSPECIFIED',
100
100
  demo = True,
101
- test_lab = '', )
101
+ test_lab = '',
102
+ evaluation_type = 'EVALUATION_TYPE_UNSPECIFIED', )
102
103
  )
103
104
  else:
104
105
  return V1DeleteLeaderboardResponse(