eval-studio-client 1.2.5__py3-none-any.whl → 1.3.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. eval_studio_client/api/__init__.py +65 -0
  2. eval_studio_client/api/api/__init__.py +3 -0
  3. eval_studio_client/api/api/dashboard_report_service_api.py +292 -0
  4. eval_studio_client/api/api/dashboard_service_api.py +16 -16
  5. eval_studio_client/api/api/dashboard_test_case_annotation_service_api.py +611 -0
  6. eval_studio_client/api/api/document_service_api.py +16 -16
  7. eval_studio_client/api/api/evaluation_service_api.py +12 -12
  8. eval_studio_client/api/api/evaluator_service_api.py +16 -16
  9. eval_studio_client/api/api/leaderboard_report_service_api.py +304 -17
  10. eval_studio_client/api/api/leaderboard_service_api.py +554 -16
  11. eval_studio_client/api/api/leaderboard_test_case_annotation_service_api.py +611 -0
  12. eval_studio_client/api/api/model_service_api.py +16 -16
  13. eval_studio_client/api/api/operation_service_api.py +821 -17
  14. eval_studio_client/api/api/perturbator_service_api.py +22 -22
  15. eval_studio_client/api/api/test_case_service_api.py +300 -16
  16. eval_studio_client/api/api/test_class_service_api.py +16 -16
  17. eval_studio_client/api/api/test_service_api.py +285 -16
  18. eval_studio_client/api/api/workflow_node_service_api.py +16 -16
  19. eval_studio_client/api/api/workflow_service_api.py +16 -16
  20. eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +2 -1
  21. eval_studio_client/api/docs/DashboardReportServiceApi.md +75 -0
  22. eval_studio_client/api/docs/DashboardServiceApi.md +5 -5
  23. eval_studio_client/api/docs/DashboardTestCaseAnnotationServiceApi.md +149 -0
  24. eval_studio_client/api/docs/DocumentServiceApi.md +5 -5
  25. eval_studio_client/api/docs/EvaluationServiceApi.md +4 -4
  26. eval_studio_client/api/docs/EvaluatorServiceApi.md +5 -5
  27. eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -5
  28. eval_studio_client/api/docs/LeaderboardServiceApi.md +141 -5
  29. eval_studio_client/api/docs/LeaderboardTestCaseAnnotationServiceApi.md +149 -0
  30. eval_studio_client/api/docs/ModelServiceApi.md +5 -5
  31. eval_studio_client/api/docs/OperationServiceApi.md +215 -8
  32. eval_studio_client/api/docs/PerturbatorServiceApi.md +7 -7
  33. eval_studio_client/api/docs/RequiredTheDashboardTestCaseAnnotationToUpdate.md +35 -0
  34. eval_studio_client/api/docs/RequiredTheLeaderboardTestCaseAnnotationToUpdate.md +35 -0
  35. eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +1 -0
  36. eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +1 -0
  37. eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +1 -0
  38. eval_studio_client/api/docs/TestCaseServiceApi.md +75 -5
  39. eval_studio_client/api/docs/TestCaseServiceAppendTestCasesRequest.md +30 -0
  40. eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
  41. eval_studio_client/api/docs/TestServiceApi.md +73 -5
  42. eval_studio_client/api/docs/V1ActualOutputMeta.md +30 -0
  43. eval_studio_client/api/docs/V1ActualOutputMetaDiff.md +36 -0
  44. eval_studio_client/api/docs/V1AgentChatActivityDiagram.md +31 -0
  45. eval_studio_client/api/docs/V1AgentChatActivityDiagramEdge.md +32 -0
  46. eval_studio_client/api/docs/V1AgentChatActivityDiagramNode.md +32 -0
  47. eval_studio_client/api/docs/V1AgentChatActivityDiagramRow.md +30 -0
  48. eval_studio_client/api/docs/V1AgentChatScriptUsage.md +33 -0
  49. eval_studio_client/api/docs/V1AgentChatScriptsBarChart.md +30 -0
  50. eval_studio_client/api/docs/V1AgentChatToolUsage.md +33 -0
  51. eval_studio_client/api/docs/V1AgentChatToolsBarChart.md +30 -0
  52. eval_studio_client/api/docs/V1AllMetricScores.md +29 -0
  53. eval_studio_client/api/docs/V1AppendTestCasesResponse.md +29 -0
  54. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheRequest.md +31 -0
  55. eval_studio_client/api/docs/V1BatchCreateLeaderboardsWithoutCacheResponse.md +29 -0
  56. eval_studio_client/api/docs/V1BatchMarkOperationSeenByCreatorResponse.md +29 -0
  57. eval_studio_client/api/docs/V1CmpLeaderboardReportsRequest.md +33 -0
  58. eval_studio_client/api/docs/V1CmpLeaderboardReportsResponse.md +29 -0
  59. eval_studio_client/api/docs/V1ComparisonItem.md +36 -0
  60. eval_studio_client/api/docs/V1ComparisonMetricScore.md +30 -0
  61. eval_studio_client/api/docs/V1ComparisonResult.md +31 -0
  62. eval_studio_client/api/docs/V1ComparisonSummary.md +31 -0
  63. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
  64. eval_studio_client/api/docs/V1CreateTestFromTestCasesRequest.md +32 -0
  65. eval_studio_client/api/docs/V1CreateTestFromTestCasesResponse.md +29 -0
  66. eval_studio_client/api/docs/V1DashboardReport.md +31 -0
  67. eval_studio_client/api/docs/V1DashboardReportResult.md +39 -0
  68. eval_studio_client/api/docs/V1DashboardTestCaseAnnotation.md +36 -0
  69. eval_studio_client/api/docs/V1DataFragment.md +31 -0
  70. eval_studio_client/api/docs/V1DeepCompareLeaderboardsRequest.md +33 -0
  71. eval_studio_client/api/docs/V1DeepCompareLeaderboardsResponse.md +29 -0
  72. eval_studio_client/api/docs/V1DiffItem.md +36 -0
  73. eval_studio_client/api/docs/V1EvaluationType.md +12 -0
  74. eval_studio_client/api/docs/V1FlippedMetric.md +31 -0
  75. eval_studio_client/api/docs/V1GetDashboardReportResponse.md +29 -0
  76. eval_studio_client/api/docs/V1HumanDecision.md +12 -0
  77. eval_studio_client/api/docs/V1Info.md +1 -0
  78. eval_studio_client/api/docs/V1Leaderboard.md +1 -0
  79. eval_studio_client/api/docs/V1LeaderboardCmpReport.md +30 -0
  80. eval_studio_client/api/docs/V1LeaderboardComparisonItem.md +31 -0
  81. eval_studio_client/api/docs/V1LeaderboardInfo.md +30 -0
  82. eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +6 -3
  83. eval_studio_client/api/docs/V1LeaderboardReportResult.md +11 -8
  84. eval_studio_client/api/docs/V1LeaderboardReportResultView.md +12 -0
  85. eval_studio_client/api/docs/V1LeaderboardTestCaseAnnotation.md +36 -0
  86. eval_studio_client/api/docs/V1ListDashboardTestCaseAnnotationsResponse.md +29 -0
  87. eval_studio_client/api/docs/V1ListLeaderboardTestCaseAnnotationsResponse.md +29 -0
  88. eval_studio_client/api/docs/V1ListOperationsResponse.md +1 -0
  89. eval_studio_client/api/docs/V1ListUnseenOperationsResponse.md +30 -0
  90. eval_studio_client/api/docs/V1MarkOperationSeenByCreatorResponse.md +29 -0
  91. eval_studio_client/api/docs/V1Metric.md +30 -0
  92. eval_studio_client/api/docs/V1MetricAverage.md +36 -0
  93. eval_studio_client/api/docs/V1MetricMeta.md +40 -0
  94. eval_studio_client/api/docs/V1MetricScore.md +1 -1
  95. eval_studio_client/api/docs/V1MetricScores.md +1 -1
  96. eval_studio_client/api/docs/V1ModelType.md +1 -1
  97. eval_studio_client/api/docs/V1ModelsComparisons.md +32 -0
  98. eval_studio_client/api/docs/V1ModelsComparisonsMetrics.md +33 -0
  99. eval_studio_client/api/docs/V1ModelsOverview.md +34 -0
  100. eval_studio_client/api/docs/V1Operation.md +1 -0
  101. eval_studio_client/api/docs/V1OperationView.md +12 -0
  102. eval_studio_client/api/docs/V1RetrievedContextDiff.md +36 -0
  103. eval_studio_client/api/docs/V1Stats.md +2 -0
  104. eval_studio_client/api/docs/V1TechnicalMetrics.md +30 -0
  105. eval_studio_client/api/docs/V1TechnicalMetricsDetail.md +33 -0
  106. eval_studio_client/api/docs/V1TestCaseLeaderboardItem.md +31 -0
  107. eval_studio_client/api/docs/V1TestCaseRelationshipInfo.md +31 -0
  108. eval_studio_client/api/docs/V1TestCaseResult.md +48 -0
  109. eval_studio_client/api/docs/V1TextSimilarityMetric.md +12 -0
  110. eval_studio_client/api/docs/V1UpdateDashboardTestCaseAnnotationResponse.md +29 -0
  111. eval_studio_client/api/docs/V1UpdateLeaderboardTestCaseAnnotationResponse.md +29 -0
  112. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +5 -5
  113. eval_studio_client/api/docs/WorkflowServiceApi.md +5 -5
  114. eval_studio_client/api/models/__init__.py +62 -0
  115. eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +17 -2
  116. eval_studio_client/api/models/required_the_dashboard_test_case_annotation_to_update.py +108 -0
  117. eval_studio_client/api/models/required_the_leaderboard_test_case_annotation_to_update.py +108 -0
  118. eval_studio_client/api/models/required_the_leaderboard_to_update.py +5 -2
  119. eval_studio_client/api/models/required_the_operation_to_finalize.py +6 -2
  120. eval_studio_client/api/models/required_the_operation_to_update.py +6 -2
  121. eval_studio_client/api/models/test_case_service_append_test_cases_request.py +89 -0
  122. eval_studio_client/api/models/v1_actual_output_meta.py +97 -0
  123. eval_studio_client/api/models/v1_actual_output_meta_diff.py +101 -0
  124. eval_studio_client/api/models/v1_agent_chat_activity_diagram.py +109 -0
  125. eval_studio_client/api/models/v1_agent_chat_activity_diagram_edge.py +97 -0
  126. eval_studio_client/api/models/v1_agent_chat_activity_diagram_node.py +97 -0
  127. eval_studio_client/api/models/v1_agent_chat_activity_diagram_row.py +97 -0
  128. eval_studio_client/api/models/v1_agent_chat_script_usage.py +101 -0
  129. eval_studio_client/api/models/v1_agent_chat_scripts_bar_chart.py +102 -0
  130. eval_studio_client/api/models/v1_agent_chat_tool_usage.py +101 -0
  131. eval_studio_client/api/models/v1_agent_chat_tools_bar_chart.py +102 -0
  132. eval_studio_client/api/models/v1_all_metric_scores.py +87 -0
  133. eval_studio_client/api/models/v1_append_test_cases_response.py +95 -0
  134. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_request.py +99 -0
  135. eval_studio_client/api/models/v1_batch_create_leaderboards_without_cache_response.py +91 -0
  136. eval_studio_client/api/models/v1_batch_mark_operation_seen_by_creator_response.py +95 -0
  137. eval_studio_client/api/models/v1_cmp_leaderboard_reports_request.py +96 -0
  138. eval_studio_client/api/models/v1_cmp_leaderboard_reports_response.py +91 -0
  139. eval_studio_client/api/models/v1_comparison_item.py +130 -0
  140. eval_studio_client/api/models/v1_comparison_metric_score.py +89 -0
  141. eval_studio_client/api/models/v1_comparison_result.py +120 -0
  142. eval_studio_client/api/models/v1_comparison_summary.py +91 -0
  143. eval_studio_client/api/models/v1_create_evaluation_request.py +5 -2
  144. eval_studio_client/api/models/v1_create_test_from_test_cases_request.py +93 -0
  145. eval_studio_client/api/models/v1_create_test_from_test_cases_response.py +91 -0
  146. eval_studio_client/api/models/v1_dashboard_report.py +109 -0
  147. eval_studio_client/api/models/v1_dashboard_report_result.py +139 -0
  148. eval_studio_client/api/models/v1_dashboard_test_case_annotation.py +112 -0
  149. eval_studio_client/api/models/v1_data_fragment.py +91 -0
  150. eval_studio_client/api/models/v1_deep_compare_leaderboards_request.py +96 -0
  151. eval_studio_client/api/models/v1_deep_compare_leaderboards_response.py +91 -0
  152. eval_studio_client/api/models/v1_diff_item.py +137 -0
  153. eval_studio_client/api/models/v1_evaluation_type.py +39 -0
  154. eval_studio_client/api/models/v1_flipped_metric.py +91 -0
  155. eval_studio_client/api/models/v1_get_dashboard_report_response.py +91 -0
  156. eval_studio_client/api/models/v1_human_decision.py +38 -0
  157. eval_studio_client/api/models/v1_info.py +4 -2
  158. eval_studio_client/api/models/v1_leaderboard.py +5 -2
  159. eval_studio_client/api/models/v1_leaderboard_cmp_report.py +93 -0
  160. eval_studio_client/api/models/v1_leaderboard_comparison_item.py +91 -0
  161. eval_studio_client/api/models/v1_leaderboard_info.py +97 -0
  162. eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +23 -9
  163. eval_studio_client/api/models/v1_leaderboard_report_result.py +21 -10
  164. eval_studio_client/api/models/v1_leaderboard_report_result_view.py +38 -0
  165. eval_studio_client/api/models/v1_leaderboard_test_case_annotation.py +112 -0
  166. eval_studio_client/api/models/v1_list_dashboard_test_case_annotations_response.py +95 -0
  167. eval_studio_client/api/models/v1_list_leaderboard_test_case_annotations_response.py +95 -0
  168. eval_studio_client/api/models/v1_list_operations_response.py +5 -3
  169. eval_studio_client/api/models/v1_list_unseen_operations_response.py +97 -0
  170. eval_studio_client/api/models/v1_mark_operation_seen_by_creator_response.py +91 -0
  171. eval_studio_client/api/models/v1_metric.py +89 -0
  172. eval_studio_client/api/models/v1_metric_average.py +101 -0
  173. eval_studio_client/api/models/v1_metric_meta.py +109 -0
  174. eval_studio_client/api/models/v1_metric_score.py +6 -1
  175. eval_studio_client/api/models/v1_metric_scores.py +1 -1
  176. eval_studio_client/api/models/v1_model_type.py +2 -1
  177. eval_studio_client/api/models/v1_models_comparisons.py +93 -0
  178. eval_studio_client/api/models/v1_models_comparisons_metrics.py +103 -0
  179. eval_studio_client/api/models/v1_models_overview.py +97 -0
  180. eval_studio_client/api/models/v1_operation.py +6 -2
  181. eval_studio_client/api/models/v1_operation_view.py +38 -0
  182. eval_studio_client/api/models/v1_retrieved_context_diff.py +101 -0
  183. eval_studio_client/api/models/v1_stats.py +16 -2
  184. eval_studio_client/api/models/v1_technical_metrics.py +96 -0
  185. eval_studio_client/api/models/v1_technical_metrics_detail.py +95 -0
  186. eval_studio_client/api/models/v1_test_case_leaderboard_item.py +91 -0
  187. eval_studio_client/api/models/v1_test_case_relationship_info.py +91 -0
  188. eval_studio_client/api/models/v1_test_case_result.py +157 -0
  189. eval_studio_client/api/models/v1_text_similarity_metric.py +39 -0
  190. eval_studio_client/api/models/v1_update_dashboard_test_case_annotation_response.py +91 -0
  191. eval_studio_client/api/models/v1_update_leaderboard_test_case_annotation_response.py +91 -0
  192. eval_studio_client/api/models/v1_workflow_node_type.py +1 -0
  193. eval_studio_client/api/models/v1_workflow_type.py +1 -0
  194. eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +6 -0
  195. eval_studio_client/api/test/test_dashboard_report_service_api.py +37 -0
  196. eval_studio_client/api/test/test_dashboard_test_case_annotation_service_api.py +43 -0
  197. eval_studio_client/api/test/test_leaderboard_report_service_api.py +6 -0
  198. eval_studio_client/api/test/test_leaderboard_service_api.py +12 -0
  199. eval_studio_client/api/test/test_leaderboard_test_case_annotation_service_api.py +43 -0
  200. eval_studio_client/api/test/test_operation_service_api.py +18 -0
  201. eval_studio_client/api/test/test_required_the_dashboard_test_case_annotation_to_update.py +57 -0
  202. eval_studio_client/api/test/test_required_the_leaderboard_test_case_annotation_to_update.py +57 -0
  203. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +2 -1
  204. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +2 -1
  205. eval_studio_client/api/test/test_required_the_operation_to_update.py +2 -1
  206. eval_studio_client/api/test/test_test_case_service_api.py +6 -0
  207. eval_studio_client/api/test/test_test_case_service_append_test_cases_request.py +52 -0
  208. eval_studio_client/api/test/test_test_service_api.py +6 -0
  209. eval_studio_client/api/test/test_v1_abort_operation_response.py +2 -1
  210. eval_studio_client/api/test/test_v1_actual_output_meta.py +61 -0
  211. eval_studio_client/api/test/test_v1_actual_output_meta_diff.py +66 -0
  212. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram.py +65 -0
  213. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_edge.py +53 -0
  214. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_node.py +53 -0
  215. eval_studio_client/api/test/test_v1_agent_chat_activity_diagram_row.py +56 -0
  216. eval_studio_client/api/test/test_v1_agent_chat_script_usage.py +54 -0
  217. eval_studio_client/api/test/test_v1_agent_chat_scripts_bar_chart.py +57 -0
  218. eval_studio_client/api/test/test_v1_agent_chat_tool_usage.py +54 -0
  219. eval_studio_client/api/test/test_v1_agent_chat_tools_bar_chart.py +57 -0
  220. eval_studio_client/api/test/test_v1_all_metric_scores.py +53 -0
  221. eval_studio_client/api/test/test_v1_append_test_cases_response.py +74 -0
  222. eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +2 -1
  223. eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +2 -1
  224. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_request.py +120 -0
  225. eval_studio_client/api/test/test_v1_batch_create_leaderboards_without_cache_response.py +72 -0
  226. eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +2 -1
  227. eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +2 -1
  228. eval_studio_client/api/test/test_v1_batch_get_operations_response.py +2 -1
  229. eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +2 -1
  230. eval_studio_client/api/test/test_v1_batch_mark_operation_seen_by_creator_response.py +74 -0
  231. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_request.py +55 -0
  232. eval_studio_client/api/test/test_v1_cmp_leaderboard_reports_response.py +255 -0
  233. eval_studio_client/api/test/test_v1_comparison_item.py +233 -0
  234. eval_studio_client/api/test/test_v1_comparison_metric_score.py +52 -0
  235. eval_studio_client/api/test/test_v1_comparison_result.py +258 -0
  236. eval_studio_client/api/test/test_v1_comparison_summary.py +53 -0
  237. eval_studio_client/api/test/test_v1_create_evaluation_request.py +2 -1
  238. eval_studio_client/api/test/test_v1_create_leaderboard_request.py +2 -1
  239. eval_studio_client/api/test/test_v1_create_leaderboard_response.py +2 -1
  240. eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +2 -1
  241. eval_studio_client/api/test/test_v1_create_test_from_test_cases_request.py +54 -0
  242. eval_studio_client/api/test/test_v1_create_test_from_test_cases_response.py +68 -0
  243. eval_studio_client/api/test/test_v1_dashboard_report.py +142 -0
  244. eval_studio_client/api/test/test_v1_dashboard_report_result.py +72 -0
  245. eval_studio_client/api/test/test_v1_dashboard_test_case_annotation.py +58 -0
  246. eval_studio_client/api/test/test_v1_data_fragment.py +57 -0
  247. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_request.py +55 -0
  248. eval_studio_client/api/test/test_v1_deep_compare_leaderboards_response.py +255 -0
  249. eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +2 -1
  250. eval_studio_client/api/test/test_v1_diff_item.py +226 -0
  251. eval_studio_client/api/test/test_v1_evaluation_type.py +33 -0
  252. eval_studio_client/api/test/test_v1_finalize_operation_response.py +2 -1
  253. eval_studio_client/api/test/test_v1_flipped_metric.py +53 -0
  254. eval_studio_client/api/test/test_v1_generate_test_cases_response.py +2 -1
  255. eval_studio_client/api/test/test_v1_get_dashboard_report_response.py +143 -0
  256. eval_studio_client/api/test/test_v1_get_info_response.py +4 -1
  257. eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +39 -2
  258. eval_studio_client/api/test/test_v1_get_leaderboard_response.py +2 -1
  259. eval_studio_client/api/test/test_v1_get_operation_response.py +2 -1
  260. eval_studio_client/api/test/test_v1_get_stats_response.py +3 -1
  261. eval_studio_client/api/test/test_v1_human_decision.py +33 -0
  262. eval_studio_client/api/test/test_v1_import_leaderboard_response.py +2 -1
  263. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +2 -1
  264. eval_studio_client/api/test/test_v1_info.py +4 -1
  265. eval_studio_client/api/test/test_v1_leaderboard.py +2 -1
  266. eval_studio_client/api/test/test_v1_leaderboard_cmp_report.py +254 -0
  267. eval_studio_client/api/test/test_v1_leaderboard_comparison_item.py +53 -0
  268. eval_studio_client/api/test/test_v1_leaderboard_info.py +57 -0
  269. eval_studio_client/api/test/test_v1_leaderboard_report.py +39 -2
  270. eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +33 -1
  271. eval_studio_client/api/test/test_v1_leaderboard_report_result.py +39 -2
  272. eval_studio_client/api/test/test_v1_leaderboard_report_result_view.py +33 -0
  273. eval_studio_client/api/test/test_v1_leaderboard_test_case_annotation.py +58 -0
  274. eval_studio_client/api/test/test_v1_list_dashboard_test_case_annotations_response.py +61 -0
  275. eval_studio_client/api/test/test_v1_list_leaderboard_test_case_annotations_response.py +61 -0
  276. eval_studio_client/api/test/test_v1_list_leaderboards_response.py +2 -1
  277. eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +2 -1
  278. eval_studio_client/api/test/test_v1_list_operations_response.py +4 -2
  279. eval_studio_client/api/test/test_v1_list_unseen_operations_response.py +75 -0
  280. eval_studio_client/api/test/test_v1_mark_operation_seen_by_creator_response.py +72 -0
  281. eval_studio_client/api/test/test_v1_metric.py +52 -0
  282. eval_studio_client/api/test/test_v1_metric_average.py +58 -0
  283. eval_studio_client/api/test/test_v1_metric_meta.py +66 -0
  284. eval_studio_client/api/test/test_v1_models_comparisons.py +54 -0
  285. eval_studio_client/api/test/test_v1_models_comparisons_metrics.py +65 -0
  286. eval_studio_client/api/test/test_v1_models_overview.py +60 -0
  287. eval_studio_client/api/test/test_v1_operation.py +2 -1
  288. eval_studio_client/api/test/test_v1_operation_view.py +33 -0
  289. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +2 -1
  290. eval_studio_client/api/test/test_v1_retrieved_context_diff.py +66 -0
  291. eval_studio_client/api/test/test_v1_stats.py +3 -1
  292. eval_studio_client/api/test/test_v1_technical_metrics.py +62 -0
  293. eval_studio_client/api/test/test_v1_technical_metrics_detail.py +55 -0
  294. eval_studio_client/api/test/test_v1_test_case_leaderboard_item.py +53 -0
  295. eval_studio_client/api/test/test_v1_test_case_relationship_info.py +53 -0
  296. eval_studio_client/api/test/test_v1_test_case_result.py +106 -0
  297. eval_studio_client/api/test/test_v1_text_similarity_metric.py +33 -0
  298. eval_studio_client/api/test/test_v1_update_dashboard_test_case_annotation_response.py +59 -0
  299. eval_studio_client/api/test/test_v1_update_leaderboard_response.py +2 -1
  300. eval_studio_client/api/test/test_v1_update_leaderboard_test_case_annotation_response.py +59 -0
  301. eval_studio_client/api/test/test_v1_update_operation_response.py +2 -1
  302. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +2340 -210
  303. eval_studio_client/models.py +18 -6
  304. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/METADATA +2 -2
  305. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/RECORD +306 -111
  306. {eval_studio_client-1.2.5.dist-info → eval_studio_client-1.3.0a1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,255 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_cmp_leaderboard_reports_response import V1CmpLeaderboardReportsResponse
18
+
19
+ class TestV1CmpLeaderboardReportsResponse(unittest.TestCase):
20
+ """V1CmpLeaderboardReportsResponse unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1CmpLeaderboardReportsResponse:
29
+ """Test V1CmpLeaderboardReportsResponse
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1CmpLeaderboardReportsResponse`
34
+ """
35
+ model = V1CmpLeaderboardReportsResponse()
36
+ if include_optional:
37
+ return V1CmpLeaderboardReportsResponse(
38
+ leaderboard_cmp_report = eval_studio_client.api.models.v1_leaderboard_cmp_report.v1LeaderboardCmpReport(
39
+ summary = '',
40
+ comparison_result = eval_studio_client.api.models.complete_comparison_result_structure.Complete comparison result structure(
41
+ diffs = [
42
+ eval_studio_client.api.models.a_single_diff_item_comparing_two_leaderboards.A single diff item comparing two leaderboards(
43
+ diff_key = '',
44
+ items = [
45
+ eval_studio_client.api.models.a_single_comparison_item_showing_differences_between_baseline_and_current.A single comparison item showing differences between baseline and current(
46
+ question = '',
47
+ diff_flipped_metrics = [
48
+ eval_studio_client.api.models.flipped_metric_information.Flipped metric information(
49
+ metric_name = '',
50
+ baseline_value = 1.337,
51
+ current_value = 1.337, )
52
+ ],
53
+ baseline_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
54
+ key = '',
55
+ input = '',
56
+ corpus = [
57
+ ''
58
+ ],
59
+ context = [
60
+ ''
61
+ ],
62
+ categories = [
63
+ ''
64
+ ],
65
+ relationships = [
66
+ eval_studio_client.api.models.test_case_relationship_information.Test case relationship information(
67
+ type = '',
68
+ target = '',
69
+ target_type = '', )
70
+ ],
71
+ expected_output = '',
72
+ output_constraints = [
73
+ ''
74
+ ],
75
+ output_condition = '',
76
+ actual_output = '',
77
+ actual_duration = 1.337,
78
+ cost = 1.337,
79
+ model_key = '',
80
+ test_key = '',
81
+ test_case_key = '',
82
+ metrics = [
83
+ eval_studio_client.api.models.metric_information.Metric information(
84
+ key = '',
85
+ value = 1.337, )
86
+ ],
87
+ metrics_meta = {
88
+ 'key' : ''
89
+ },
90
+ actual_output_meta = [
91
+ eval_studio_client.api.models.actual_output_metadata.Actual output metadata(
92
+ tokenization = '',
93
+ data = [
94
+ eval_studio_client.api.models.data_fragment.Data fragment(
95
+ text = '',
96
+ meta = {
97
+ 'key' : ''
98
+ }, )
99
+ ], )
100
+ ],
101
+ metric_scores = [
102
+ eval_studio_client.api.models.comparison_metric_score_information_(specific_to_comparison_reports).Comparison metric score information (specific to comparison reports)(
103
+ metric_name = '',
104
+ metric_score = 1.337, )
105
+ ],
106
+ result_error_message = '', ),
107
+ baseline_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
108
+ sentences = [
109
+ ''
110
+ ],
111
+ sentences_count = 56,
112
+ common_sentences = [
113
+ ''
114
+ ],
115
+ common_count = 56,
116
+ unique_sentences = [
117
+ ''
118
+ ],
119
+ unique_count = 56,
120
+ identical = True,
121
+ sentence_similarity = {
122
+ 'key' : 1.337
123
+ }, ),
124
+ baseline_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
125
+ chunks = [
126
+ ''
127
+ ],
128
+ chunks_count = 56,
129
+ common_chunks = [
130
+ ''
131
+ ],
132
+ common_count = 56,
133
+ unique_chunks = [
134
+ ''
135
+ ],
136
+ unique_count = 56,
137
+ identical = True,
138
+ chunk_similarity = {
139
+ 'key' : 1.337
140
+ }, ),
141
+ current_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
142
+ key = '',
143
+ input = '',
144
+ expected_output = '',
145
+ output_condition = '',
146
+ actual_output = '',
147
+ actual_duration = 1.337,
148
+ cost = 1.337,
149
+ model_key = '',
150
+ test_key = '',
151
+ test_case_key = '',
152
+ result_error_message = '', ),
153
+ current_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
154
+ sentences_count = 56,
155
+ common_count = 56,
156
+ unique_count = 56,
157
+ identical = True, ),
158
+ current_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
159
+ chunks_count = 56,
160
+ common_count = 56,
161
+ unique_count = 56,
162
+ identical = True, ), )
163
+ ],
164
+ summary = eval_studio_client.api.models.comparison_summary.Comparison summary(
165
+ recommendation_winner = '',
166
+ recommendation = '',
167
+ recommendation_confidence = '', ),
168
+ models_overview = eval_studio_client.api.models.models_overview.Models overview(
169
+ baseline_model_key = '',
170
+ current_model_key = '',
171
+ baseline_model_name = '',
172
+ baseline_collection_id = [
173
+ ''
174
+ ],
175
+ current_model_name = '',
176
+ current_collection_id = [
177
+ ''
178
+ ], ),
179
+ models_comparisons = eval_studio_client.api.models.models_comparison_statistics.Models comparison statistics(
180
+ test_case_ranks_baseline = 56,
181
+ test_case_ranks_current = 56,
182
+ test_case_wins_baseline = 56,
183
+ test_case_wins_current = 56, ),
184
+ models_comparisons_metrics = eval_studio_client.api.models.detailed_metrics_comparisons.Detailed metrics comparisons(
185
+ metrics_ranks_baseline = 1.337,
186
+ metrics_ranks_current = 1.337,
187
+ metrics_wins_baseline = 56,
188
+ metrics_wins_current = 56,
189
+ metrics_averages = [
190
+ eval_studio_client.api.models.metric_average_comparison.Metric average comparison(
191
+ metric_key = '',
192
+ baseline_avg = 1.337,
193
+ current_avg = 1.337,
194
+ diff = 1.337,
195
+ baseline_better_wins = 56,
196
+ current_better_wins = 56,
197
+ baseline_rank_avg = 1.337,
198
+ current_rank_avg = 1.337, )
199
+ ], ),
200
+ technical_metrics = eval_studio_client.api.models.technical_metrics_for_model_performance.Technical metrics for model performance(
201
+ baseline = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
202
+ cost_sum = 1.337,
203
+ duration_sum = 1.337,
204
+ duration_min = 1.337,
205
+ duration_max = 1.337,
206
+ duration_avg = 1.337, ),
207
+ current = eval_studio_client.api.models.technical_metrics_detail.Technical metrics detail(
208
+ cost_sum = 1.337,
209
+ duration_sum = 1.337,
210
+ duration_min = 1.337,
211
+ duration_max = 1.337,
212
+ duration_avg = 1.337, ), ),
213
+ test_cases_leaderboard = [
214
+ eval_studio_client.api.models.test_case_leaderboard_item.Test case leaderboard item(
215
+ wins = 56,
216
+ question = '',
217
+ changed_metrics_count = 56, )
218
+ ], )
219
+ ],
220
+ leaderboards = [
221
+ eval_studio_client.api.models.leaderboard_information.Leaderboard information(
222
+ key = '', )
223
+ ],
224
+ metrics_meta = {
225
+ 'key' : eval_studio_client.api.models.metric_metadata.Metric metadata(
226
+ key = '',
227
+ display_name = '',
228
+ data_type = '',
229
+ display_value = '',
230
+ description = '',
231
+ value_range = [
232
+ 1.337
233
+ ],
234
+ value_enum = [
235
+ ''
236
+ ],
237
+ higher_is_better = True,
238
+ threshold = 1.337,
239
+ is_primary_metric = True,
240
+ parent_metric = '',
241
+ exclude = True, )
242
+ }, ), )
243
+ )
244
+ else:
245
+ return V1CmpLeaderboardReportsResponse(
246
+ )
247
+ """
248
+
249
+ def testV1CmpLeaderboardReportsResponse(self):
250
+ """Test V1CmpLeaderboardReportsResponse"""
251
+ # inst_req_only = self.make_instance(include_optional=False)
252
+ # inst_req_and_optional = self.make_instance(include_optional=True)
253
+
254
+ if __name__ == '__main__':
255
+ unittest.main()
@@ -0,0 +1,233 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_comparison_item import V1ComparisonItem
18
+
19
+ class TestV1ComparisonItem(unittest.TestCase):
20
+ """V1ComparisonItem unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1ComparisonItem:
29
+ """Test V1ComparisonItem
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1ComparisonItem`
34
+ """
35
+ model = V1ComparisonItem()
36
+ if include_optional:
37
+ return V1ComparisonItem(
38
+ question = '',
39
+ diff_flipped_metrics = [
40
+ eval_studio_client.api.models.flipped_metric_information.Flipped metric information(
41
+ metric_name = '',
42
+ baseline_value = 1.337,
43
+ current_value = 1.337, )
44
+ ],
45
+ baseline_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
46
+ key = '',
47
+ input = '',
48
+ corpus = [
49
+ ''
50
+ ],
51
+ context = [
52
+ ''
53
+ ],
54
+ categories = [
55
+ ''
56
+ ],
57
+ relationships = [
58
+ eval_studio_client.api.models.test_case_relationship_information.Test case relationship information(
59
+ type = '',
60
+ target = '',
61
+ target_type = '', )
62
+ ],
63
+ expected_output = '',
64
+ output_constraints = [
65
+ ''
66
+ ],
67
+ output_condition = '',
68
+ actual_output = '',
69
+ actual_duration = 1.337,
70
+ cost = 1.337,
71
+ model_key = '',
72
+ test_key = '',
73
+ test_case_key = '',
74
+ metrics = [
75
+ eval_studio_client.api.models.metric_information.Metric information(
76
+ key = '',
77
+ value = 1.337, )
78
+ ],
79
+ metrics_meta = {
80
+ 'key' : ''
81
+ },
82
+ actual_output_meta = [
83
+ eval_studio_client.api.models.actual_output_metadata.Actual output metadata(
84
+ tokenization = '',
85
+ data = [
86
+ eval_studio_client.api.models.data_fragment.Data fragment(
87
+ text = '',
88
+ meta = {
89
+ 'key' : ''
90
+ }, )
91
+ ], )
92
+ ],
93
+ metric_scores = [
94
+ eval_studio_client.api.models.comparison_metric_score_information_(specific_to_comparison_reports).Comparison metric score information (specific to comparison reports)(
95
+ metric_name = '',
96
+ metric_score = 1.337, )
97
+ ],
98
+ result_error_message = '', ),
99
+ baseline_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
100
+ sentences = [
101
+ ''
102
+ ],
103
+ sentences_count = 56,
104
+ common_sentences = [
105
+ ''
106
+ ],
107
+ common_count = 56,
108
+ unique_sentences = [
109
+ ''
110
+ ],
111
+ unique_count = 56,
112
+ identical = True,
113
+ sentence_similarity = {
114
+ 'key' : 1.337
115
+ }, ),
116
+ baseline_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
117
+ chunks = [
118
+ ''
119
+ ],
120
+ chunks_count = 56,
121
+ common_chunks = [
122
+ ''
123
+ ],
124
+ common_count = 56,
125
+ unique_chunks = [
126
+ ''
127
+ ],
128
+ unique_count = 56,
129
+ identical = True,
130
+ chunk_similarity = {
131
+ 'key' : 1.337
132
+ }, ),
133
+ current_test_case_result = eval_studio_client.api.models.test_case_result.Test case result(
134
+ key = '',
135
+ input = '',
136
+ corpus = [
137
+ ''
138
+ ],
139
+ context = [
140
+ ''
141
+ ],
142
+ categories = [
143
+ ''
144
+ ],
145
+ relationships = [
146
+ eval_studio_client.api.models.test_case_relationship_information.Test case relationship information(
147
+ type = '',
148
+ target = '',
149
+ target_type = '', )
150
+ ],
151
+ expected_output = '',
152
+ output_constraints = [
153
+ ''
154
+ ],
155
+ output_condition = '',
156
+ actual_output = '',
157
+ actual_duration = 1.337,
158
+ cost = 1.337,
159
+ model_key = '',
160
+ test_key = '',
161
+ test_case_key = '',
162
+ metrics = [
163
+ eval_studio_client.api.models.metric_information.Metric information(
164
+ key = '',
165
+ value = 1.337, )
166
+ ],
167
+ metrics_meta = {
168
+ 'key' : ''
169
+ },
170
+ actual_output_meta = [
171
+ eval_studio_client.api.models.actual_output_metadata.Actual output metadata(
172
+ tokenization = '',
173
+ data = [
174
+ eval_studio_client.api.models.data_fragment.Data fragment(
175
+ text = '',
176
+ meta = {
177
+ 'key' : ''
178
+ }, )
179
+ ], )
180
+ ],
181
+ metric_scores = [
182
+ eval_studio_client.api.models.comparison_metric_score_information_(specific_to_comparison_reports).Comparison metric score information (specific to comparison reports)(
183
+ metric_name = '',
184
+ metric_score = 1.337, )
185
+ ],
186
+ result_error_message = '', ),
187
+ current_diff_actual_output_meta = eval_studio_client.api.models.actual_output_metadata_diff.Actual output metadata diff(
188
+ sentences = [
189
+ ''
190
+ ],
191
+ sentences_count = 56,
192
+ common_sentences = [
193
+ ''
194
+ ],
195
+ common_count = 56,
196
+ unique_sentences = [
197
+ ''
198
+ ],
199
+ unique_count = 56,
200
+ identical = True,
201
+ sentence_similarity = {
202
+ 'key' : 1.337
203
+ }, ),
204
+ current_diff_retrieved_context = eval_studio_client.api.models.retrieved_context_diff.Retrieved context diff(
205
+ chunks = [
206
+ ''
207
+ ],
208
+ chunks_count = 56,
209
+ common_chunks = [
210
+ ''
211
+ ],
212
+ common_count = 56,
213
+ unique_chunks = [
214
+ ''
215
+ ],
216
+ unique_count = 56,
217
+ identical = True,
218
+ chunk_similarity = {
219
+ 'key' : 1.337
220
+ }, )
221
+ )
222
+ else:
223
+ return V1ComparisonItem(
224
+ )
225
+ """
226
+
227
+ def testV1ComparisonItem(self):
228
+ """Test V1ComparisonItem"""
229
+ # inst_req_only = self.make_instance(include_optional=False)
230
+ # inst_req_and_optional = self.make_instance(include_optional=True)
231
+
232
+ if __name__ == '__main__':
233
+ unittest.main()
@@ -0,0 +1,52 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ ai/h2o/eval_studio/v1/insight.proto
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: version not set
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from eval_studio_client.api.models.v1_comparison_metric_score import V1ComparisonMetricScore
18
+
19
+ class TestV1ComparisonMetricScore(unittest.TestCase):
20
+ """V1ComparisonMetricScore unit test stubs"""
21
+
22
+ def setUp(self):
23
+ pass
24
+
25
+ def tearDown(self):
26
+ pass
27
+
28
+ def make_instance(self, include_optional) -> V1ComparisonMetricScore:
29
+ """Test V1ComparisonMetricScore
30
+ include_option is a boolean, when False only required
31
+ params are included, when True both required and
32
+ optional params are included """
33
+ # uncomment below to create an instance of `V1ComparisonMetricScore`
34
+ """
35
+ model = V1ComparisonMetricScore()
36
+ if include_optional:
37
+ return V1ComparisonMetricScore(
38
+ metric_name = '',
39
+ metric_score = 1.337
40
+ )
41
+ else:
42
+ return V1ComparisonMetricScore(
43
+ )
44
+ """
45
+
46
+ def testV1ComparisonMetricScore(self):
47
+ """Test V1ComparisonMetricScore"""
48
+ # inst_req_only = self.make_instance(include_optional=False)
49
+ # inst_req_and_optional = self.make_instance(include_optional=True)
50
+
51
+ if __name__ == '__main__':
52
+ unittest.main()