eval-studio-client 1.0.0a1__py3-none-any.whl → 1.1.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. eval_studio_client/api/__init__.py +79 -1
  2. eval_studio_client/api/api/__init__.py +9 -0
  3. eval_studio_client/api/api/adversarial_inputs_service_api.py +321 -0
  4. eval_studio_client/api/api/dashboard_service_api.py +1 -1
  5. eval_studio_client/api/api/document_service_api.py +1 -1
  6. eval_studio_client/api/api/evaluation_service_api.py +1 -1
  7. eval_studio_client/api/api/evaluator_service_api.py +1 -1
  8. eval_studio_client/api/api/generated_questions_validation_service_api.py +321 -0
  9. eval_studio_client/api/api/human_calibration_service_api.py +304 -0
  10. eval_studio_client/api/api/info_service_api.py +1 -1
  11. eval_studio_client/api/api/leaderboard_report_service_api.py +292 -0
  12. eval_studio_client/api/api/leaderboard_service_api.py +17 -17
  13. eval_studio_client/api/api/model_service_api.py +17 -17
  14. eval_studio_client/api/api/operation_progress_service_api.py +1 -1
  15. eval_studio_client/api/api/operation_service_api.py +272 -17
  16. eval_studio_client/api/api/perturbation_service_api.py +1 -1
  17. eval_studio_client/api/api/perturbator_service_api.py +285 -18
  18. eval_studio_client/api/api/prompt_generation_service_api.py +1 -1
  19. eval_studio_client/api/api/prompt_library_service_api.py +669 -0
  20. eval_studio_client/api/api/test_case_relationship_service_api.py +292 -0
  21. eval_studio_client/api/api/test_case_service_api.py +17 -17
  22. eval_studio_client/api/api/test_class_service_api.py +17 -17
  23. eval_studio_client/api/api/test_lab_service_api.py +1 -1
  24. eval_studio_client/api/api/test_service_api.py +1238 -102
  25. eval_studio_client/api/api/who_am_i_service_api.py +1 -1
  26. eval_studio_client/api/api/workflow_edge_service_api.py +835 -0
  27. eval_studio_client/api/api/workflow_node_service_api.py +2431 -0
  28. eval_studio_client/api/api/workflow_service_api.py +1893 -0
  29. eval_studio_client/api/api_client.py +1 -1
  30. eval_studio_client/api/configuration.py +1 -1
  31. eval_studio_client/api/docs/AdversarialInputsServiceApi.md +78 -0
  32. eval_studio_client/api/docs/AdversarialInputsServiceTestAdversarialInputsRobustnessRequest.md +45 -0
  33. eval_studio_client/api/docs/GeneratedQuestionsValidationServiceApi.md +78 -0
  34. eval_studio_client/api/docs/GeneratedQuestionsValidationServiceValidateGeneratedQuestionsRequest.md +30 -0
  35. eval_studio_client/api/docs/HumanCalibrationServiceApi.md +77 -0
  36. eval_studio_client/api/docs/LeaderboardReportServiceApi.md +75 -0
  37. eval_studio_client/api/docs/LeaderboardServiceApi.md +5 -5
  38. eval_studio_client/api/docs/ModelServiceApi.md +5 -5
  39. eval_studio_client/api/docs/OperationServiceApi.md +72 -5
  40. eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +1 -0
  41. eval_studio_client/api/docs/PerturbatorServiceApi.md +38 -8
  42. eval_studio_client/api/docs/PromptGenerationServiceAutoGeneratePromptsRequest.md +4 -2
  43. eval_studio_client/api/docs/PromptLibraryServiceApi.md +155 -0
  44. eval_studio_client/api/docs/ProtobufNullValue.md +12 -0
  45. eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +3 -0
  46. eval_studio_client/api/docs/RequiredTheUpdatedWorkflow.md +47 -0
  47. eval_studio_client/api/docs/RequiredTheUpdatedWorkflowNode.md +44 -0
  48. eval_studio_client/api/docs/TestCaseRelationshipServiceApi.md +75 -0
  49. eval_studio_client/api/docs/TestCaseServiceApi.md +5 -5
  50. eval_studio_client/api/docs/TestClassServiceApi.md +5 -5
  51. eval_studio_client/api/docs/TestServiceApi.md +285 -5
  52. eval_studio_client/api/docs/TestServiceCloneTestRequest.md +30 -0
  53. eval_studio_client/api/docs/TestServiceGenerateTestCasesRequest.md +3 -1
  54. eval_studio_client/api/docs/TestServiceImportTestCasesFromLibraryRequest.md +32 -0
  55. eval_studio_client/api/docs/TestServiceListTestCaseLibraryItemsRequest.md +35 -0
  56. eval_studio_client/api/docs/TestServicePerturbTestInPlaceRequest.md +30 -0
  57. eval_studio_client/api/docs/TestServicePerturbTestRequest.md +1 -0
  58. eval_studio_client/api/docs/V1AbortOperationResponse.md +29 -0
  59. eval_studio_client/api/docs/V1BatchDeleteWorkflowsRequest.md +29 -0
  60. eval_studio_client/api/docs/V1BatchDeleteWorkflowsResponse.md +29 -0
  61. eval_studio_client/api/docs/V1BatchGetWorkflowEdgesResponse.md +29 -0
  62. eval_studio_client/api/docs/V1BatchGetWorkflowNodesResponse.md +29 -0
  63. eval_studio_client/api/docs/V1CloneTestResponse.md +29 -0
  64. eval_studio_client/api/docs/V1CloneWorkflowResponse.md +29 -0
  65. eval_studio_client/api/docs/V1Context.md +32 -0
  66. eval_studio_client/api/docs/V1CreateEvaluationRequest.md +1 -0
  67. eval_studio_client/api/docs/V1CreateWorkflowEdgeResponse.md +29 -0
  68. eval_studio_client/api/docs/V1CreateWorkflowNodeResponse.md +29 -0
  69. eval_studio_client/api/docs/V1CreateWorkflowResponse.md +29 -0
  70. eval_studio_client/api/docs/V1DeleteWorkflowEdgeResponse.md +29 -0
  71. eval_studio_client/api/docs/V1DeleteWorkflowNodeResponse.md +29 -0
  72. eval_studio_client/api/docs/V1DeleteWorkflowResponse.md +29 -0
  73. eval_studio_client/api/docs/V1EstimateThresholdRequest.md +33 -0
  74. eval_studio_client/api/docs/V1GeneratedTestCase.md +30 -0
  75. eval_studio_client/api/docs/V1GetLeaderboardReportResponse.md +29 -0
  76. eval_studio_client/api/docs/V1GetWorkflowNodePrerequisitesResponse.md +30 -0
  77. eval_studio_client/api/docs/V1GetWorkflowNodeResponse.md +29 -0
  78. eval_studio_client/api/docs/V1GetWorkflowResponse.md +29 -0
  79. eval_studio_client/api/docs/V1ImportEvaluationRequest.md +1 -0
  80. eval_studio_client/api/docs/V1ImportTestCasesFromLibraryResponse.md +29 -0
  81. eval_studio_client/api/docs/V1ImportTestCasesRequest.md +33 -0
  82. eval_studio_client/api/docs/V1Info.md +3 -0
  83. eval_studio_client/api/docs/V1InitWorkflowNodeResponse.md +29 -0
  84. eval_studio_client/api/docs/V1LabeledTestCase.md +31 -0
  85. eval_studio_client/api/docs/V1LeaderboardReport.md +32 -0
  86. eval_studio_client/api/docs/V1LeaderboardReportActualOutputData.md +31 -0
  87. eval_studio_client/api/docs/V1LeaderboardReportActualOutputMeta.md +31 -0
  88. eval_studio_client/api/docs/V1LeaderboardReportEvaluator.md +42 -0
  89. eval_studio_client/api/docs/V1LeaderboardReportEvaluatorParameter.md +38 -0
  90. eval_studio_client/api/docs/V1LeaderboardReportExplanation.md +34 -0
  91. eval_studio_client/api/docs/V1LeaderboardReportMetricsMetaEntry.md +41 -0
  92. eval_studio_client/api/docs/V1LeaderboardReportModel.md +39 -0
  93. eval_studio_client/api/docs/V1LeaderboardReportResult.md +45 -0
  94. eval_studio_client/api/docs/V1LeaderboardReportResultRelationship.md +32 -0
  95. eval_studio_client/api/docs/V1ListPromptLibraryItemsResponse.md +29 -0
  96. eval_studio_client/api/docs/V1ListTestCaseLibraryItemsResponse.md +29 -0
  97. eval_studio_client/api/docs/V1ListTestCaseRelationshipsResponse.md +29 -0
  98. eval_studio_client/api/docs/V1ListWorkflowsResponse.md +29 -0
  99. eval_studio_client/api/docs/V1MetricScore.md +31 -0
  100. eval_studio_client/api/docs/V1MetricScores.md +29 -0
  101. eval_studio_client/api/docs/V1PerturbTestInPlaceResponse.md +29 -0
  102. eval_studio_client/api/docs/V1ProcessWorkflowNodeResponse.md +29 -0
  103. eval_studio_client/api/docs/V1PromptLibraryItem.md +42 -0
  104. eval_studio_client/api/docs/V1RepeatedString.md +29 -0
  105. eval_studio_client/api/docs/V1ResetWorkflowNodeResponse.md +29 -0
  106. eval_studio_client/api/docs/V1TestCase.md +3 -0
  107. eval_studio_client/api/docs/V1TestSuiteEvaluates.md +11 -0
  108. eval_studio_client/api/docs/V1UpdateWorkflowNodeResponse.md +29 -0
  109. eval_studio_client/api/docs/V1UpdateWorkflowResponse.md +29 -0
  110. eval_studio_client/api/docs/V1Workflow.md +49 -0
  111. eval_studio_client/api/docs/V1WorkflowEdge.md +40 -0
  112. eval_studio_client/api/docs/V1WorkflowEdgeType.md +12 -0
  113. eval_studio_client/api/docs/V1WorkflowNode.md +46 -0
  114. eval_studio_client/api/docs/V1WorkflowNodeArtifact.md +40 -0
  115. eval_studio_client/api/docs/V1WorkflowNodeArtifacts.md +29 -0
  116. eval_studio_client/api/docs/V1WorkflowNodeAttributes.md +30 -0
  117. eval_studio_client/api/docs/V1WorkflowNodeStatus.md +12 -0
  118. eval_studio_client/api/docs/V1WorkflowNodeType.md +12 -0
  119. eval_studio_client/api/docs/V1WorkflowNodeView.md +12 -0
  120. eval_studio_client/api/docs/V1WorkflowType.md +12 -0
  121. eval_studio_client/api/docs/WorkflowEdgeServiceApi.md +215 -0
  122. eval_studio_client/api/docs/WorkflowNodeServiceApi.md +632 -0
  123. eval_studio_client/api/docs/WorkflowServiceApi.md +488 -0
  124. eval_studio_client/api/docs/WorkflowServiceCloneWorkflowRequest.md +33 -0
  125. eval_studio_client/api/exceptions.py +1 -1
  126. eval_studio_client/api/models/__init__.py +70 -1
  127. eval_studio_client/api/models/adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +143 -0
  128. eval_studio_client/api/models/generated_questions_validation_service_validate_generated_questions_request.py +97 -0
  129. eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +9 -3
  130. eval_studio_client/api/models/prompt_generation_service_auto_generate_prompts_request.py +17 -6
  131. eval_studio_client/api/models/protobuf_any.py +1 -1
  132. eval_studio_client/api/models/protobuf_null_value.py +36 -0
  133. eval_studio_client/api/models/required_the_dashboard_to_update.py +1 -1
  134. eval_studio_client/api/models/required_the_document_to_update.py +1 -1
  135. eval_studio_client/api/models/required_the_leaderboard_to_update.py +1 -1
  136. eval_studio_client/api/models/required_the_model_to_update.py +1 -1
  137. eval_studio_client/api/models/required_the_operation_to_finalize.py +1 -1
  138. eval_studio_client/api/models/required_the_operation_to_update.py +1 -1
  139. eval_studio_client/api/models/required_the_test_case_to_update.py +14 -3
  140. eval_studio_client/api/models/required_the_test_to_update.py +1 -1
  141. eval_studio_client/api/models/required_the_updated_workflow.py +160 -0
  142. eval_studio_client/api/models/required_the_updated_workflow_node.py +152 -0
  143. eval_studio_client/api/models/rpc_status.py +1 -1
  144. eval_studio_client/api/models/test_case_service_batch_delete_test_cases_request.py +1 -1
  145. eval_studio_client/api/models/test_service_clone_test_request.py +89 -0
  146. eval_studio_client/api/models/test_service_generate_test_cases_request.py +8 -4
  147. eval_studio_client/api/models/test_service_import_test_cases_from_library_request.py +93 -0
  148. eval_studio_client/api/models/test_service_list_test_case_library_items_request.py +99 -0
  149. eval_studio_client/api/models/test_service_perturb_test_in_place_request.py +97 -0
  150. eval_studio_client/api/models/test_service_perturb_test_request.py +5 -3
  151. eval_studio_client/api/models/v1_abort_operation_response.py +91 -0
  152. eval_studio_client/api/models/v1_batch_create_leaderboards_request.py +1 -1
  153. eval_studio_client/api/models/v1_batch_create_leaderboards_response.py +1 -1
  154. eval_studio_client/api/models/v1_batch_delete_dashboards_request.py +1 -1
  155. eval_studio_client/api/models/v1_batch_delete_dashboards_response.py +1 -1
  156. eval_studio_client/api/models/v1_batch_delete_documents_request.py +1 -1
  157. eval_studio_client/api/models/v1_batch_delete_documents_response.py +1 -1
  158. eval_studio_client/api/models/v1_batch_delete_evaluators_request.py +1 -1
  159. eval_studio_client/api/models/v1_batch_delete_evaluators_response.py +1 -1
  160. eval_studio_client/api/models/v1_batch_delete_leaderboards_request.py +1 -1
  161. eval_studio_client/api/models/v1_batch_delete_leaderboards_response.py +1 -1
  162. eval_studio_client/api/models/v1_batch_delete_models_request.py +1 -1
  163. eval_studio_client/api/models/v1_batch_delete_models_response.py +1 -1
  164. eval_studio_client/api/models/v1_batch_delete_test_cases_response.py +1 -1
  165. eval_studio_client/api/models/v1_batch_delete_tests_request.py +1 -1
  166. eval_studio_client/api/models/v1_batch_delete_tests_response.py +1 -1
  167. eval_studio_client/api/models/v1_batch_delete_workflows_request.py +87 -0
  168. eval_studio_client/api/models/v1_batch_delete_workflows_response.py +95 -0
  169. eval_studio_client/api/models/v1_batch_get_dashboards_response.py +1 -1
  170. eval_studio_client/api/models/v1_batch_get_documents_response.py +1 -1
  171. eval_studio_client/api/models/v1_batch_get_leaderboards_response.py +1 -1
  172. eval_studio_client/api/models/v1_batch_get_models_response.py +1 -1
  173. eval_studio_client/api/models/v1_batch_get_operations_response.py +1 -1
  174. eval_studio_client/api/models/v1_batch_get_tests_response.py +1 -1
  175. eval_studio_client/api/models/v1_batch_get_workflow_edges_response.py +95 -0
  176. eval_studio_client/api/models/v1_batch_get_workflow_nodes_response.py +95 -0
  177. eval_studio_client/api/models/v1_batch_import_leaderboard_request.py +1 -1
  178. eval_studio_client/api/models/v1_batch_import_leaderboard_response.py +1 -1
  179. eval_studio_client/api/models/v1_batch_import_tests_request.py +1 -1
  180. eval_studio_client/api/models/v1_batch_import_tests_response.py +1 -1
  181. eval_studio_client/api/models/v1_check_base_models_response.py +1 -1
  182. eval_studio_client/api/models/v1_clone_test_response.py +91 -0
  183. eval_studio_client/api/models/v1_clone_workflow_response.py +91 -0
  184. eval_studio_client/api/models/v1_collection_info.py +1 -1
  185. eval_studio_client/api/models/v1_context.py +93 -0
  186. eval_studio_client/api/models/v1_create_dashboard_response.py +1 -1
  187. eval_studio_client/api/models/v1_create_document_response.py +1 -1
  188. eval_studio_client/api/models/v1_create_evaluation_request.py +8 -3
  189. eval_studio_client/api/models/v1_create_evaluator_response.py +1 -1
  190. eval_studio_client/api/models/v1_create_leaderboard_request.py +1 -1
  191. eval_studio_client/api/models/v1_create_leaderboard_response.py +1 -1
  192. eval_studio_client/api/models/v1_create_leaderboard_without_cache_response.py +1 -1
  193. eval_studio_client/api/models/v1_create_model_response.py +1 -1
  194. eval_studio_client/api/models/v1_create_perturbation_response.py +1 -1
  195. eval_studio_client/api/models/v1_create_test_case_response.py +1 -1
  196. eval_studio_client/api/models/v1_create_test_lab_response.py +1 -1
  197. eval_studio_client/api/models/v1_create_test_response.py +1 -1
  198. eval_studio_client/api/models/v1_create_workflow_edge_response.py +91 -0
  199. eval_studio_client/api/models/v1_create_workflow_node_response.py +91 -0
  200. eval_studio_client/api/models/v1_create_workflow_response.py +91 -0
  201. eval_studio_client/api/models/v1_dashboard.py +1 -1
  202. eval_studio_client/api/models/v1_dashboard_status.py +1 -1
  203. eval_studio_client/api/models/v1_delete_dashboard_response.py +1 -1
  204. eval_studio_client/api/models/v1_delete_document_response.py +1 -1
  205. eval_studio_client/api/models/v1_delete_evaluator_response.py +1 -1
  206. eval_studio_client/api/models/v1_delete_leaderboard_response.py +1 -1
  207. eval_studio_client/api/models/v1_delete_model_response.py +1 -1
  208. eval_studio_client/api/models/v1_delete_test_case_response.py +1 -1
  209. eval_studio_client/api/models/v1_delete_test_response.py +1 -1
  210. eval_studio_client/api/models/v1_delete_workflow_edge_response.py +91 -0
  211. eval_studio_client/api/models/v1_delete_workflow_node_response.py +91 -0
  212. eval_studio_client/api/models/v1_delete_workflow_response.py +91 -0
  213. eval_studio_client/api/models/v1_document.py +1 -1
  214. eval_studio_client/api/models/v1_estimate_threshold_request.py +103 -0
  215. eval_studio_client/api/models/v1_evaluation_test.py +1 -1
  216. eval_studio_client/api/models/v1_evaluator.py +1 -1
  217. eval_studio_client/api/models/v1_evaluator_param_type.py +1 -1
  218. eval_studio_client/api/models/v1_evaluator_parameter.py +1 -1
  219. eval_studio_client/api/models/v1_evaluator_view.py +1 -1
  220. eval_studio_client/api/models/v1_finalize_operation_response.py +1 -1
  221. eval_studio_client/api/models/v1_find_all_test_cases_by_id_response.py +1 -1
  222. eval_studio_client/api/models/v1_find_test_lab_response.py +1 -1
  223. eval_studio_client/api/models/v1_generate_test_cases_response.py +1 -1
  224. eval_studio_client/api/models/v1_generated_test_case.py +101 -0
  225. eval_studio_client/api/models/v1_get_dashboard_response.py +1 -1
  226. eval_studio_client/api/models/v1_get_document_response.py +1 -1
  227. eval_studio_client/api/models/v1_get_evaluator_response.py +1 -1
  228. eval_studio_client/api/models/v1_get_info_response.py +1 -1
  229. eval_studio_client/api/models/v1_get_leaderboard_report_response.py +91 -0
  230. eval_studio_client/api/models/v1_get_leaderboard_response.py +1 -1
  231. eval_studio_client/api/models/v1_get_model_response.py +1 -1
  232. eval_studio_client/api/models/v1_get_operation_progress_by_parent_response.py +1 -1
  233. eval_studio_client/api/models/v1_get_operation_response.py +1 -1
  234. eval_studio_client/api/models/v1_get_perturbator_response.py +1 -1
  235. eval_studio_client/api/models/v1_get_test_case_response.py +1 -1
  236. eval_studio_client/api/models/v1_get_test_class_response.py +1 -1
  237. eval_studio_client/api/models/v1_get_test_response.py +1 -1
  238. eval_studio_client/api/models/v1_get_workflow_node_prerequisites_response.py +89 -0
  239. eval_studio_client/api/models/v1_get_workflow_node_response.py +91 -0
  240. eval_studio_client/api/models/v1_get_workflow_response.py +91 -0
  241. eval_studio_client/api/models/v1_import_evaluation_request.py +8 -3
  242. eval_studio_client/api/models/v1_import_leaderboard_request.py +1 -1
  243. eval_studio_client/api/models/v1_import_leaderboard_response.py +1 -1
  244. eval_studio_client/api/models/v1_import_test_cases_from_library_response.py +91 -0
  245. eval_studio_client/api/models/v1_import_test_cases_request.py +95 -0
  246. eval_studio_client/api/models/v1_info.py +10 -4
  247. eval_studio_client/api/models/v1_init_workflow_node_response.py +91 -0
  248. eval_studio_client/api/models/v1_insight.py +1 -1
  249. eval_studio_client/api/models/v1_labeled_test_case.py +91 -0
  250. eval_studio_client/api/models/v1_leaderboard.py +1 -1
  251. eval_studio_client/api/models/v1_leaderboard_report.py +115 -0
  252. eval_studio_client/api/models/v1_leaderboard_report_actual_output_data.py +93 -0
  253. eval_studio_client/api/models/v1_leaderboard_report_actual_output_meta.py +101 -0
  254. eval_studio_client/api/models/v1_leaderboard_report_evaluator.py +155 -0
  255. eval_studio_client/api/models/v1_leaderboard_report_evaluator_parameter.py +109 -0
  256. eval_studio_client/api/models/v1_leaderboard_report_explanation.py +103 -0
  257. eval_studio_client/api/models/v1_leaderboard_report_metrics_meta_entry.py +129 -0
  258. eval_studio_client/api/models/v1_leaderboard_report_model.py +121 -0
  259. eval_studio_client/api/models/v1_leaderboard_report_result.py +175 -0
  260. eval_studio_client/api/models/v1_leaderboard_report_result_relationship.py +97 -0
  261. eval_studio_client/api/models/v1_leaderboard_status.py +1 -1
  262. eval_studio_client/api/models/v1_leaderboard_type.py +1 -1
  263. eval_studio_client/api/models/v1_leaderboard_view.py +1 -1
  264. eval_studio_client/api/models/v1_list_base_models_response.py +1 -1
  265. eval_studio_client/api/models/v1_list_dashboards_response.py +1 -1
  266. eval_studio_client/api/models/v1_list_documents_response.py +1 -1
  267. eval_studio_client/api/models/v1_list_evaluators_response.py +1 -1
  268. eval_studio_client/api/models/v1_list_leaderboards_response.py +1 -1
  269. eval_studio_client/api/models/v1_list_llm_models_response.py +1 -1
  270. eval_studio_client/api/models/v1_list_model_collections_response.py +1 -1
  271. eval_studio_client/api/models/v1_list_models_response.py +1 -1
  272. eval_studio_client/api/models/v1_list_most_recent_dashboards_response.py +1 -1
  273. eval_studio_client/api/models/v1_list_most_recent_leaderboards_response.py +1 -1
  274. eval_studio_client/api/models/v1_list_most_recent_models_response.py +1 -1
  275. eval_studio_client/api/models/v1_list_most_recent_tests_response.py +1 -1
  276. eval_studio_client/api/models/v1_list_operations_response.py +1 -1
  277. eval_studio_client/api/models/v1_list_perturbators_response.py +1 -1
  278. eval_studio_client/api/models/v1_list_prompt_library_items_response.py +95 -0
  279. eval_studio_client/api/models/v1_list_rag_collections_response.py +1 -1
  280. eval_studio_client/api/models/v1_list_test_case_library_items_response.py +95 -0
  281. eval_studio_client/api/models/v1_list_test_case_relationships_response.py +95 -0
  282. eval_studio_client/api/models/v1_list_test_cases_response.py +1 -1
  283. eval_studio_client/api/models/v1_list_test_classes_response.py +1 -1
  284. eval_studio_client/api/models/v1_list_tests_response.py +1 -1
  285. eval_studio_client/api/models/v1_list_workflows_response.py +95 -0
  286. eval_studio_client/api/models/v1_metric_score.py +89 -0
  287. eval_studio_client/api/models/v1_metric_scores.py +95 -0
  288. eval_studio_client/api/models/v1_model.py +1 -1
  289. eval_studio_client/api/models/v1_model_type.py +1 -1
  290. eval_studio_client/api/models/v1_operation.py +1 -1
  291. eval_studio_client/api/models/v1_operation_progress.py +1 -1
  292. eval_studio_client/api/models/v1_perturb_test_in_place_response.py +91 -0
  293. eval_studio_client/api/models/v1_perturb_test_response.py +1 -1
  294. eval_studio_client/api/models/v1_perturbator.py +1 -1
  295. eval_studio_client/api/models/v1_perturbator_configuration.py +1 -1
  296. eval_studio_client/api/models/v1_perturbator_intensity.py +1 -1
  297. eval_studio_client/api/models/v1_problem_and_action.py +1 -1
  298. eval_studio_client/api/models/v1_process_workflow_node_response.py +91 -0
  299. eval_studio_client/api/models/v1_prompt_library_item.py +129 -0
  300. eval_studio_client/api/models/v1_repeated_string.py +87 -0
  301. eval_studio_client/api/models/v1_reset_workflow_node_response.py +91 -0
  302. eval_studio_client/api/models/v1_test.py +1 -1
  303. eval_studio_client/api/models/v1_test_case.py +14 -3
  304. eval_studio_client/api/models/v1_test_case_relationship.py +1 -1
  305. eval_studio_client/api/models/v1_test_cases_generator.py +1 -1
  306. eval_studio_client/api/models/v1_test_class.py +1 -1
  307. eval_studio_client/api/models/v1_test_class_type.py +1 -1
  308. eval_studio_client/api/models/v1_test_lab.py +1 -1
  309. eval_studio_client/api/models/v1_test_suite_evaluates.py +39 -0
  310. eval_studio_client/api/models/v1_update_dashboard_response.py +1 -1
  311. eval_studio_client/api/models/v1_update_document_response.py +1 -1
  312. eval_studio_client/api/models/v1_update_leaderboard_response.py +1 -1
  313. eval_studio_client/api/models/v1_update_model_response.py +1 -1
  314. eval_studio_client/api/models/v1_update_operation_response.py +1 -1
  315. eval_studio_client/api/models/v1_update_test_case_response.py +1 -1
  316. eval_studio_client/api/models/v1_update_test_response.py +1 -1
  317. eval_studio_client/api/models/v1_update_workflow_node_response.py +91 -0
  318. eval_studio_client/api/models/v1_update_workflow_response.py +91 -0
  319. eval_studio_client/api/models/v1_who_am_i_response.py +1 -1
  320. eval_studio_client/api/models/v1_workflow.py +164 -0
  321. eval_studio_client/api/models/v1_workflow_edge.py +123 -0
  322. eval_studio_client/api/models/v1_workflow_edge_type.py +37 -0
  323. eval_studio_client/api/models/v1_workflow_node.py +156 -0
  324. eval_studio_client/api/models/v1_workflow_node_artifact.py +122 -0
  325. eval_studio_client/api/models/v1_workflow_node_artifacts.py +97 -0
  326. eval_studio_client/api/models/v1_workflow_node_attributes.py +87 -0
  327. eval_studio_client/api/models/v1_workflow_node_status.py +40 -0
  328. eval_studio_client/api/models/v1_workflow_node_type.py +44 -0
  329. eval_studio_client/api/models/v1_workflow_node_view.py +38 -0
  330. eval_studio_client/api/models/v1_workflow_type.py +37 -0
  331. eval_studio_client/api/models/workflow_service_clone_workflow_request.py +95 -0
  332. eval_studio_client/api/rest.py +1 -1
  333. eval_studio_client/api/test/test_adversarial_inputs_service_api.py +37 -0
  334. eval_studio_client/api/test/test_adversarial_inputs_service_test_adversarial_inputs_robustness_request.py +128 -0
  335. eval_studio_client/api/test/test_dashboard_service_api.py +1 -1
  336. eval_studio_client/api/test/test_document_service_api.py +1 -1
  337. eval_studio_client/api/test/test_evaluation_service_api.py +1 -1
  338. eval_studio_client/api/test/test_evaluator_service_api.py +1 -1
  339. eval_studio_client/api/test/test_generated_questions_validation_service_api.py +37 -0
  340. eval_studio_client/api/test/test_generated_questions_validation_service_validate_generated_questions_request.py +83 -0
  341. eval_studio_client/api/test/test_human_calibration_service_api.py +38 -0
  342. eval_studio_client/api/test/test_info_service_api.py +1 -1
  343. eval_studio_client/api/test/test_leaderboard_report_service_api.py +37 -0
  344. eval_studio_client/api/test/test_leaderboard_service_api.py +1 -1
  345. eval_studio_client/api/test/test_model_service_api.py +1 -1
  346. eval_studio_client/api/test/test_operation_progress_service_api.py +1 -1
  347. eval_studio_client/api/test/test_operation_service_api.py +7 -1
  348. eval_studio_client/api/test/test_perturbation_service_api.py +1 -1
  349. eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +25 -3
  350. eval_studio_client/api/test/test_perturbator_service_api.py +1 -1
  351. eval_studio_client/api/test/test_prompt_generation_service_api.py +1 -1
  352. eval_studio_client/api/test/test_prompt_generation_service_auto_generate_prompts_request.py +13 -5
  353. eval_studio_client/api/test/test_prompt_library_service_api.py +43 -0
  354. eval_studio_client/api/test/test_protobuf_any.py +1 -1
  355. eval_studio_client/api/test/test_protobuf_null_value.py +33 -0
  356. eval_studio_client/api/test/test_required_the_dashboard_to_update.py +1 -1
  357. eval_studio_client/api/test/test_required_the_document_to_update.py +1 -1
  358. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +1 -1
  359. eval_studio_client/api/test/test_required_the_model_to_update.py +1 -1
  360. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +1 -1
  361. eval_studio_client/api/test/test_required_the_operation_to_update.py +1 -1
  362. eval_studio_client/api/test/test_required_the_test_case_to_update.py +9 -2
  363. eval_studio_client/api/test/test_required_the_test_to_update.py +1 -1
  364. eval_studio_client/api/test/test_required_the_updated_workflow.py +91 -0
  365. eval_studio_client/api/test/test_required_the_updated_workflow_node.py +80 -0
  366. eval_studio_client/api/test/test_rpc_status.py +1 -1
  367. eval_studio_client/api/test/test_test_case_relationship_service_api.py +37 -0
  368. eval_studio_client/api/test/test_test_case_service_api.py +1 -1
  369. eval_studio_client/api/test/test_test_case_service_batch_delete_test_cases_request.py +1 -1
  370. eval_studio_client/api/test/test_test_class_service_api.py +1 -1
  371. eval_studio_client/api/test/test_test_lab_service_api.py +1 -1
  372. eval_studio_client/api/test/test_test_service_api.py +25 -1
  373. eval_studio_client/api/test/test_test_service_clone_test_request.py +52 -0
  374. eval_studio_client/api/test/test_test_service_generate_test_cases_request.py +8 -2
  375. eval_studio_client/api/test/test_test_service_import_test_cases_from_library_request.py +56 -0
  376. eval_studio_client/api/test/test_test_service_list_test_case_library_items_request.py +63 -0
  377. eval_studio_client/api/test/test_test_service_perturb_test_in_place_request.py +59 -0
  378. eval_studio_client/api/test/test_test_service_perturb_test_request.py +5 -2
  379. eval_studio_client/api/test/test_v1_abort_operation_response.py +71 -0
  380. eval_studio_client/api/test/test_v1_batch_create_leaderboards_request.py +1 -1
  381. eval_studio_client/api/test/test_v1_batch_create_leaderboards_response.py +1 -1
  382. eval_studio_client/api/test/test_v1_batch_delete_dashboards_request.py +1 -1
  383. eval_studio_client/api/test/test_v1_batch_delete_dashboards_response.py +1 -1
  384. eval_studio_client/api/test/test_v1_batch_delete_documents_request.py +1 -1
  385. eval_studio_client/api/test/test_v1_batch_delete_documents_response.py +1 -1
  386. eval_studio_client/api/test/test_v1_batch_delete_evaluators_request.py +1 -1
  387. eval_studio_client/api/test/test_v1_batch_delete_evaluators_response.py +1 -1
  388. eval_studio_client/api/test/test_v1_batch_delete_leaderboards_request.py +1 -1
  389. eval_studio_client/api/test/test_v1_batch_delete_leaderboards_response.py +1 -1
  390. eval_studio_client/api/test/test_v1_batch_delete_models_request.py +1 -1
  391. eval_studio_client/api/test/test_v1_batch_delete_models_response.py +1 -1
  392. eval_studio_client/api/test/test_v1_batch_delete_test_cases_response.py +9 -2
  393. eval_studio_client/api/test/test_v1_batch_delete_tests_request.py +1 -1
  394. eval_studio_client/api/test/test_v1_batch_delete_tests_response.py +1 -1
  395. eval_studio_client/api/test/test_v1_batch_delete_workflows_request.py +53 -0
  396. eval_studio_client/api/test/test_v1_batch_delete_workflows_response.py +95 -0
  397. eval_studio_client/api/test/test_v1_batch_get_dashboards_response.py +1 -1
  398. eval_studio_client/api/test/test_v1_batch_get_documents_response.py +1 -1
  399. eval_studio_client/api/test/test_v1_batch_get_leaderboards_response.py +1 -1
  400. eval_studio_client/api/test/test_v1_batch_get_models_response.py +1 -1
  401. eval_studio_client/api/test/test_v1_batch_get_operations_response.py +1 -1
  402. eval_studio_client/api/test/test_v1_batch_get_tests_response.py +1 -1
  403. eval_studio_client/api/test/test_v1_batch_get_workflow_edges_response.py +64 -0
  404. eval_studio_client/api/test/test_v1_batch_get_workflow_nodes_response.py +84 -0
  405. eval_studio_client/api/test/test_v1_batch_import_leaderboard_request.py +1 -1
  406. eval_studio_client/api/test/test_v1_batch_import_leaderboard_response.py +1 -1
  407. eval_studio_client/api/test/test_v1_batch_import_tests_request.py +1 -1
  408. eval_studio_client/api/test/test_v1_batch_import_tests_response.py +1 -1
  409. eval_studio_client/api/test/test_v1_check_base_models_response.py +1 -1
  410. eval_studio_client/api/test/test_v1_clone_test_response.py +67 -0
  411. eval_studio_client/api/test/test_v1_clone_workflow_response.py +93 -0
  412. eval_studio_client/api/test/test_v1_collection_info.py +1 -1
  413. eval_studio_client/api/test/test_v1_context.py +54 -0
  414. eval_studio_client/api/test/test_v1_create_dashboard_response.py +1 -1
  415. eval_studio_client/api/test/test_v1_create_document_response.py +1 -1
  416. eval_studio_client/api/test/test_v1_create_evaluation_request.py +25 -3
  417. eval_studio_client/api/test/test_v1_create_evaluator_response.py +1 -1
  418. eval_studio_client/api/test/test_v1_create_leaderboard_request.py +1 -1
  419. eval_studio_client/api/test/test_v1_create_leaderboard_response.py +1 -1
  420. eval_studio_client/api/test/test_v1_create_leaderboard_without_cache_response.py +1 -1
  421. eval_studio_client/api/test/test_v1_create_model_response.py +1 -1
  422. eval_studio_client/api/test/test_v1_create_perturbation_response.py +1 -1
  423. eval_studio_client/api/test/test_v1_create_test_case_response.py +9 -2
  424. eval_studio_client/api/test/test_v1_create_test_lab_response.py +1 -1
  425. eval_studio_client/api/test/test_v1_create_test_response.py +1 -1
  426. eval_studio_client/api/test/test_v1_create_workflow_edge_response.py +62 -0
  427. eval_studio_client/api/test/test_v1_create_workflow_node_response.py +82 -0
  428. eval_studio_client/api/test/test_v1_create_workflow_response.py +93 -0
  429. eval_studio_client/api/test/test_v1_dashboard.py +1 -1
  430. eval_studio_client/api/test/test_v1_dashboard_status.py +1 -1
  431. eval_studio_client/api/test/test_v1_delete_dashboard_response.py +1 -1
  432. eval_studio_client/api/test/test_v1_delete_document_response.py +1 -1
  433. eval_studio_client/api/test/test_v1_delete_evaluator_response.py +1 -1
  434. eval_studio_client/api/test/test_v1_delete_leaderboard_response.py +1 -1
  435. eval_studio_client/api/test/test_v1_delete_model_response.py +1 -1
  436. eval_studio_client/api/test/test_v1_delete_test_case_response.py +9 -2
  437. eval_studio_client/api/test/test_v1_delete_test_response.py +1 -1
  438. eval_studio_client/api/test/test_v1_delete_workflow_edge_response.py +62 -0
  439. eval_studio_client/api/test/test_v1_delete_workflow_node_response.py +82 -0
  440. eval_studio_client/api/test/test_v1_delete_workflow_response.py +93 -0
  441. eval_studio_client/api/test/test_v1_document.py +1 -1
  442. eval_studio_client/api/test/test_v1_estimate_threshold_request.py +60 -0
  443. eval_studio_client/api/test/test_v1_evaluation_test.py +9 -2
  444. eval_studio_client/api/test/test_v1_evaluator.py +1 -1
  445. eval_studio_client/api/test/test_v1_evaluator_param_type.py +1 -1
  446. eval_studio_client/api/test/test_v1_evaluator_parameter.py +1 -1
  447. eval_studio_client/api/test/test_v1_evaluator_view.py +1 -1
  448. eval_studio_client/api/test/test_v1_finalize_operation_response.py +1 -1
  449. eval_studio_client/api/test/test_v1_find_all_test_cases_by_id_response.py +9 -2
  450. eval_studio_client/api/test/test_v1_find_test_lab_response.py +1 -1
  451. eval_studio_client/api/test/test_v1_generate_test_cases_response.py +1 -1
  452. eval_studio_client/api/test/test_v1_generated_test_case.py +79 -0
  453. eval_studio_client/api/test/test_v1_get_dashboard_response.py +1 -1
  454. eval_studio_client/api/test/test_v1_get_document_response.py +1 -1
  455. eval_studio_client/api/test/test_v1_get_evaluator_response.py +1 -1
  456. eval_studio_client/api/test/test_v1_get_info_response.py +7 -2
  457. eval_studio_client/api/test/test_v1_get_leaderboard_report_response.py +175 -0
  458. eval_studio_client/api/test/test_v1_get_leaderboard_response.py +1 -1
  459. eval_studio_client/api/test/test_v1_get_model_response.py +1 -1
  460. eval_studio_client/api/test/test_v1_get_operation_progress_by_parent_response.py +1 -1
  461. eval_studio_client/api/test/test_v1_get_operation_response.py +1 -1
  462. eval_studio_client/api/test/test_v1_get_perturbator_response.py +1 -1
  463. eval_studio_client/api/test/test_v1_get_test_case_response.py +9 -2
  464. eval_studio_client/api/test/test_v1_get_test_class_response.py +1 -1
  465. eval_studio_client/api/test/test_v1_get_test_response.py +1 -1
  466. eval_studio_client/api/test/test_v1_get_workflow_node_prerequisites_response.py +56 -0
  467. eval_studio_client/api/test/test_v1_get_workflow_node_response.py +82 -0
  468. eval_studio_client/api/test/test_v1_get_workflow_response.py +93 -0
  469. eval_studio_client/api/test/test_v1_import_evaluation_request.py +17 -2
  470. eval_studio_client/api/test/test_v1_import_leaderboard_request.py +1 -1
  471. eval_studio_client/api/test/test_v1_import_leaderboard_response.py +1 -1
  472. eval_studio_client/api/test/test_v1_import_test_cases_from_library_response.py +71 -0
  473. eval_studio_client/api/test/test_v1_import_test_cases_request.py +57 -0
  474. eval_studio_client/api/test/test_v1_info.py +7 -2
  475. eval_studio_client/api/test/test_v1_init_workflow_node_response.py +82 -0
  476. eval_studio_client/api/test/test_v1_insight.py +1 -1
  477. eval_studio_client/api/test/test_v1_labeled_test_case.py +53 -0
  478. eval_studio_client/api/test/test_v1_leaderboard.py +1 -1
  479. eval_studio_client/api/test/test_v1_leaderboard_report.py +174 -0
  480. eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_data.py +52 -0
  481. eval_studio_client/api/test/test_v1_leaderboard_report_actual_output_meta.py +56 -0
  482. eval_studio_client/api/test/test_v1_leaderboard_report_evaluator.py +114 -0
  483. eval_studio_client/api/test/test_v1_leaderboard_report_evaluator_parameter.py +63 -0
  484. eval_studio_client/api/test/test_v1_leaderboard_report_explanation.py +58 -0
  485. eval_studio_client/api/test/test_v1_leaderboard_report_metrics_meta_entry.py +66 -0
  486. eval_studio_client/api/test/test_v1_leaderboard_report_model.py +62 -0
  487. eval_studio_client/api/test/test_v1_leaderboard_report_result.py +92 -0
  488. eval_studio_client/api/test/test_v1_leaderboard_report_result_relationship.py +53 -0
  489. eval_studio_client/api/test/test_v1_leaderboard_status.py +1 -1
  490. eval_studio_client/api/test/test_v1_leaderboard_type.py +1 -1
  491. eval_studio_client/api/test/test_v1_leaderboard_view.py +1 -1
  492. eval_studio_client/api/test/test_v1_list_base_models_response.py +1 -1
  493. eval_studio_client/api/test/test_v1_list_dashboards_response.py +1 -1
  494. eval_studio_client/api/test/test_v1_list_documents_response.py +1 -1
  495. eval_studio_client/api/test/test_v1_list_evaluators_response.py +1 -1
  496. eval_studio_client/api/test/test_v1_list_leaderboards_response.py +1 -1
  497. eval_studio_client/api/test/test_v1_list_llm_models_response.py +1 -1
  498. eval_studio_client/api/test/test_v1_list_model_collections_response.py +1 -1
  499. eval_studio_client/api/test/test_v1_list_models_response.py +1 -1
  500. eval_studio_client/api/test/test_v1_list_most_recent_dashboards_response.py +1 -1
  501. eval_studio_client/api/test/test_v1_list_most_recent_leaderboards_response.py +1 -1
  502. eval_studio_client/api/test/test_v1_list_most_recent_models_response.py +1 -1
  503. eval_studio_client/api/test/test_v1_list_most_recent_tests_response.py +1 -1
  504. eval_studio_client/api/test/test_v1_list_operations_response.py +1 -1
  505. eval_studio_client/api/test/test_v1_list_perturbators_response.py +1 -1
  506. eval_studio_client/api/test/test_v1_list_prompt_library_items_response.py +71 -0
  507. eval_studio_client/api/test/test_v1_list_rag_collections_response.py +1 -1
  508. eval_studio_client/api/test/test_v1_list_test_case_library_items_response.py +71 -0
  509. eval_studio_client/api/test/test_v1_list_test_case_relationships_response.py +56 -0
  510. eval_studio_client/api/test/test_v1_list_test_cases_response.py +9 -2
  511. eval_studio_client/api/test/test_v1_list_test_classes_response.py +1 -1
  512. eval_studio_client/api/test/test_v1_list_tests_response.py +1 -1
  513. eval_studio_client/api/test/test_v1_list_workflows_response.py +95 -0
  514. eval_studio_client/api/test/test_v1_metric_score.py +52 -0
  515. eval_studio_client/api/test/test_v1_metric_scores.py +55 -0
  516. eval_studio_client/api/test/test_v1_model.py +1 -1
  517. eval_studio_client/api/test/test_v1_model_type.py +1 -1
  518. eval_studio_client/api/test/test_v1_operation.py +1 -1
  519. eval_studio_client/api/test/test_v1_operation_progress.py +1 -1
  520. eval_studio_client/api/test/test_v1_perturb_test_in_place_response.py +67 -0
  521. eval_studio_client/api/test/test_v1_perturb_test_response.py +1 -1
  522. eval_studio_client/api/test/test_v1_perturbator.py +1 -1
  523. eval_studio_client/api/test/test_v1_perturbator_configuration.py +1 -1
  524. eval_studio_client/api/test/test_v1_perturbator_intensity.py +1 -1
  525. eval_studio_client/api/test/test_v1_problem_and_action.py +1 -1
  526. eval_studio_client/api/test/test_v1_process_workflow_node_response.py +71 -0
  527. eval_studio_client/api/test/test_v1_prompt_library_item.py +68 -0
  528. eval_studio_client/api/test/test_v1_repeated_string.py +53 -0
  529. eval_studio_client/api/test/test_v1_reset_workflow_node_response.py +82 -0
  530. eval_studio_client/api/test/test_v1_test.py +1 -1
  531. eval_studio_client/api/test/test_v1_test_case.py +9 -2
  532. eval_studio_client/api/test/test_v1_test_case_relationship.py +1 -1
  533. eval_studio_client/api/test/test_v1_test_cases_generator.py +1 -1
  534. eval_studio_client/api/test/test_v1_test_class.py +1 -1
  535. eval_studio_client/api/test/test_v1_test_class_type.py +1 -1
  536. eval_studio_client/api/test/test_v1_test_lab.py +1 -1
  537. eval_studio_client/api/test/test_v1_test_suite_evaluates.py +33 -0
  538. eval_studio_client/api/test/test_v1_update_dashboard_response.py +1 -1
  539. eval_studio_client/api/test/test_v1_update_document_response.py +1 -1
  540. eval_studio_client/api/test/test_v1_update_leaderboard_response.py +1 -1
  541. eval_studio_client/api/test/test_v1_update_model_response.py +1 -1
  542. eval_studio_client/api/test/test_v1_update_operation_response.py +1 -1
  543. eval_studio_client/api/test/test_v1_update_test_case_response.py +9 -2
  544. eval_studio_client/api/test/test_v1_update_test_response.py +1 -1
  545. eval_studio_client/api/test/test_v1_update_workflow_node_response.py +82 -0
  546. eval_studio_client/api/test/test_v1_update_workflow_response.py +93 -0
  547. eval_studio_client/api/test/test_v1_who_am_i_response.py +1 -1
  548. eval_studio_client/api/test/test_v1_workflow.py +92 -0
  549. eval_studio_client/api/test/test_v1_workflow_edge.py +61 -0
  550. eval_studio_client/api/test/test_v1_workflow_edge_type.py +33 -0
  551. eval_studio_client/api/test/test_v1_workflow_node.py +81 -0
  552. eval_studio_client/api/test/test_v1_workflow_node_artifact.py +61 -0
  553. eval_studio_client/api/test/test_v1_workflow_node_artifacts.py +64 -0
  554. eval_studio_client/api/test/test_v1_workflow_node_attributes.py +51 -0
  555. eval_studio_client/api/test/test_v1_workflow_node_status.py +33 -0
  556. eval_studio_client/api/test/test_v1_workflow_node_type.py +33 -0
  557. eval_studio_client/api/test/test_v1_workflow_node_view.py +33 -0
  558. eval_studio_client/api/test/test_v1_workflow_type.py +33 -0
  559. eval_studio_client/api/test/test_who_am_i_service_api.py +1 -1
  560. eval_studio_client/api/test/test_workflow_edge_service_api.py +52 -0
  561. eval_studio_client/api/test/test_workflow_node_service_api.py +94 -0
  562. eval_studio_client/api/test/test_workflow_service_api.py +80 -0
  563. eval_studio_client/api/test/test_workflow_service_clone_workflow_request.py +55 -0
  564. eval_studio_client/client.py +7 -0
  565. eval_studio_client/dashboards.py +66 -18
  566. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +5132 -1847
  567. eval_studio_client/leaderboards.py +125 -0
  568. eval_studio_client/models.py +3 -42
  569. eval_studio_client/test_labs.py +49 -21
  570. eval_studio_client/tests.py +323 -58
  571. eval_studio_client/utils.py +26 -0
  572. {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.1.0a5.dist-info}/METADATA +2 -3
  573. eval_studio_client-1.1.0a5.dist-info/RECORD +720 -0
  574. {eval_studio_client-1.0.0a1.dist-info → eval_studio_client-1.1.0a5.dist-info}/WHEEL +1 -1
  575. eval_studio_client-1.0.0a1.dist-info/RECORD +0 -485
@@ -1,6 +1,7 @@
1
1
  import dataclasses
2
2
  import datetime
3
3
  import json
4
+ import os
4
5
  import time
5
6
  from typing import Dict
6
7
  from typing import List
@@ -33,12 +34,14 @@ class Leaderboard:
33
34
  update_time: Optional[datetime.datetime] = None
34
35
  problems: List[p6s.Problem] = dataclasses.field(default_factory=list)
35
36
  insights: List[i6s.Insight] = dataclasses.field(default_factory=list)
37
+ summary: Optional[str] = None
36
38
  existing_collection: Optional[str] = None
37
39
  _report: Optional[str] = None
38
40
  _leaderboard: Optional[str] = None
39
41
  _model_name: Optional[str] = None
40
42
  _status: Optional[models.V1LeaderboardStatus] = None
41
43
  _client: Optional[api.ApiClient] = None
44
+ _operation: Optional[str] = None
42
45
 
43
46
  def __post_init__(self):
44
47
  self._evaluator_api = api.EvaluatorServiceApi(self._client)
@@ -85,6 +88,42 @@ class Leaderboard:
85
88
  if self._client:
86
89
  self._leaderboard_api.leaderboard_service_delete_leaderboard(self.key)
87
90
 
91
+ def download_result(self, dest: str):
92
+ """Downloads the leaderboard result to a JSON file.
93
+
94
+ Args:
95
+ dest (str): The destination path for the report.
96
+ """
97
+ if not os.path.exists(dest):
98
+ raise ValueError("Destination path does not exist.")
99
+
100
+ if os.path.isdir(dest):
101
+ dest = os.path.join(dest, "results.json")
102
+
103
+ if self._client and self.finished:
104
+ headers: Dict[str, str] = {}
105
+ url = urljoin(
106
+ self._client.configuration.host, f"/content/{self.key}/results"
107
+ )
108
+ self._client.update_params_for_auth(
109
+ headers=headers,
110
+ queries=[],
111
+ auth_settings=[],
112
+ resource_path=url,
113
+ method="GET",
114
+ body=None,
115
+ )
116
+ response = urllib3.request("GET", url, headers=headers)
117
+
118
+ if response.status == 200:
119
+ with open(dest, "wb") as f:
120
+ f.write(response.data)
121
+ return
122
+ else:
123
+ raise RuntimeError("Failed to retrieve leaderboard result.")
124
+
125
+ raise ValueError("Cannot download result for unfinished leaderboard.")
126
+
88
127
  def download_report(self, dest: str):
89
128
  """Downloads the leaderboard report to a zip file.
90
129
 
@@ -113,6 +152,30 @@ class Leaderboard:
113
152
 
114
153
  raise ValueError("Cannot download report for unfinished leaderboard.")
115
154
 
155
+ def get_result_json(self) -> str:
156
+ """Retrieves the leaderboard result as a JSON string."""
157
+ if self._client and self.finished:
158
+ headers: Dict[str, str] = {}
159
+ url = urljoin(
160
+ self._client.configuration.host, f"/content/{self.key}/results"
161
+ )
162
+ self._client.update_params_for_auth(
163
+ headers=headers,
164
+ queries=[],
165
+ auth_settings=[],
166
+ resource_path=url,
167
+ method="GET",
168
+ body=None,
169
+ )
170
+ response = urllib3.request("GET", url, headers=headers)
171
+
172
+ if response.status == 200:
173
+ return str(response.data)
174
+ else:
175
+ raise RuntimeError("Failed to retrieve leaderboard result.")
176
+
177
+ raise ValueError("Cannot download result for unfinished leaderboard.")
178
+
116
179
  def get_table(self) -> LeaderboardTable:
117
180
  """Retrieves the leaderboard table."""
118
181
  if self._client and self.finished:
@@ -169,6 +232,7 @@ class Leaderboard:
169
232
  """Refresh the leaderboard with the latest API data."""
170
233
  self.key = api_leaderboard.name or ""
171
234
  self.update_time = api_leaderboard.update_time
235
+ self.summary = api_leaderboard.leaderboard_summary
172
236
  self._leaderboard = api_leaderboard.leaderboard_table
173
237
  self._report = api_leaderboard.leaderboard_report or ""
174
238
  self._status = api_leaderboard.status
@@ -191,6 +255,7 @@ class Leaderboard:
191
255
  update_time=api_leaderboard.update_time,
192
256
  problems=problems,
193
257
  insights=insights,
258
+ summary=api_leaderboard.leaderboard_summary,
194
259
  existing_collection=api_leaderboard.h2ogpte_collection or None,
195
260
  _evaluator_name=api_leaderboard.evaluator or "",
196
261
  _test_names=api_leaderboard.tests or [],
@@ -198,6 +263,7 @@ class Leaderboard:
198
263
  _leaderboard=api_leaderboard.leaderboard_table,
199
264
  _status=api_leaderboard.status,
200
265
  _client=client,
266
+ _operation=api_leaderboard.create_operation or None,
201
267
  )
202
268
 
203
269
  @staticmethod
@@ -206,3 +272,62 @@ class Leaderboard:
206
272
  models.V1LeaderboardStatus.LEADERBOARD_STATUS_COMPLETED,
207
273
  models.V1LeaderboardStatus.LEADERBOARD_STATUS_FAILED,
208
274
  ]
275
+
276
+ @staticmethod
277
+ def from_operation(
278
+ operation: models.V1Operation, client: Optional[api.ApiClient]
279
+ ) -> Optional["Leaderboard"]:
280
+ """Retrieves the leaderboard from the operation, which created it.
281
+
282
+ Args:
283
+ operation: The operation that created the dashboard.
284
+ client: The API client to use for the leaderboard retrieval.
285
+
286
+ Returns:
287
+ Leaderboard: The leaderboard instance created by the operation.
288
+ """
289
+ if not client:
290
+ raise RuntimeError("API Client is not provided")
291
+
292
+ if not operation.metadata:
293
+ raise RuntimeError(
294
+ "Operation metadata missing, it's not possible to retrieve leaderboard from operation"
295
+ )
296
+
297
+ leaderboard_api = api.LeaderboardServiceApi(client)
298
+ leadeboard_id = operation.metadata.to_dict().get("leaderboard", "")
299
+ res = leaderboard_api.leaderboard_service_get_leaderboard(str(leadeboard_id))
300
+ if res and res.leaderboard:
301
+ return Leaderboard._from_api_leaderboard(res.leaderboard, client)
302
+
303
+ return None
304
+
305
+
306
+ class _Leaderboards:
307
+ def __init__(self, client: api.ApiClient):
308
+ self._client = client
309
+ self._api = api.LeaderboardServiceApi(client)
310
+
311
+ def get(self, key: str) -> Leaderboard:
312
+ """Gets an individual leaderboard with a given key from Eval Studio.
313
+
314
+ Args:
315
+ key: The leaderboard resource name to retrieve.
316
+ """
317
+ res = self._api.leaderboard_service_get_leaderboard(key)
318
+ if res and res.leaderboard:
319
+ return Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
320
+
321
+ raise KeyError("Leaderboard not found.")
322
+
323
+ def list(self) -> List[Leaderboard]:
324
+ """Lists all user leaderboards in Eval Studio."""
325
+ res = self._api.leaderboard_service_list_leaderboards()
326
+ if res:
327
+ res_leaderboards = res.leaderboards or []
328
+ return [
329
+ Leaderboard._from_api_leaderboard(lb, self._client)
330
+ for lb in res_leaderboards
331
+ ]
332
+
333
+ return []
@@ -168,7 +168,7 @@ class Model:
168
168
  )
169
169
 
170
170
  if res and res.operation:
171
- return self._get_leaderboard_from_operation(res.operation)
171
+ return l10s.Leaderboard.from_operation(res.operation, self._client)
172
172
 
173
173
  return None
174
174
 
@@ -226,7 +226,7 @@ class Model:
226
226
  )
227
227
 
228
228
  if res and res.operation:
229
- return self._get_dashboard_from_operation(res.operation)
229
+ return d8s.Dashboard.from_operation(res.operation, self._client)
230
230
 
231
231
  return None
232
232
 
@@ -257,7 +257,7 @@ class Model:
257
257
  )
258
258
  res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
259
259
  if res and res.operation:
260
- return self._get_leaderboard_from_operation(res.operation)
260
+ return l10s.Leaderboard.from_operation(res.operation, self._client)
261
261
 
262
262
  return None
263
263
 
@@ -273,45 +273,6 @@ class Model:
273
273
 
274
274
  raise RuntimeError("Failed to list base models")
275
275
 
276
- def _get_leaderboard_from_operation(
277
- self, operation: models.V1Operation
278
- ) -> Optional[l10s.Leaderboard]:
279
- """Retrieves the leaderboard from the operation, which created it.
280
-
281
- Args:
282
- operation: The operation that created the leaderboard.
283
- """
284
- if not operation.metadata:
285
- raise RuntimeError("Not possible to retrieve leaderboard from operation")
286
-
287
- leadeboard_id = operation.metadata.to_dict().get("leaderboard")
288
- res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
289
- if res and res.leaderboard:
290
- return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
291
-
292
- return None
293
-
294
- def _get_dashboard_from_operation(
295
- self, operation: models.V1Operation
296
- ) -> Optional[d8s.Dashboard]:
297
- """Retrieves the dashboard from the operation, which created it.
298
-
299
- Args:
300
- operation: The operation that created the dashboard.
301
- """
302
- if not self._client:
303
- raise RuntimeError("Client is not set.")
304
-
305
- if not operation.metadata:
306
- raise RuntimeError("Not possible to retrieve dashboard from operation")
307
-
308
- dashboard_id = operation.metadata.to_dict().get("dashboard")
309
- res = self._dashboard_api.dashboard_service_get_dashboard(dashboard_id)
310
- if res and res.dashboard:
311
- return d8s.Dashboard._from_api_dashboard(res.dashboard, self._client)
312
-
313
- return None
314
-
315
276
  @staticmethod
316
277
  def _from_api_model(api_model: models.V1Model, client: api.ApiClient) -> "Model":
317
278
  """Converts the API model to the client model."""
@@ -7,7 +7,8 @@ from typing import Union
7
7
  import uuid
8
8
 
9
9
  from eval_studio_client import api
10
- from eval_studio_client import evaluators
10
+ from eval_studio_client import dashboards
11
+ from eval_studio_client import evaluators as e8s
11
12
  from eval_studio_client import leaderboards as l10s
12
13
  from eval_studio_client.api import models as apiModels
13
14
 
@@ -92,11 +93,56 @@ class TestLab:
92
93
  self._models.append(_m)
93
94
  return _m
94
95
 
95
- def evaluate(self, evaluator: evaluators.Evaluator) -> Optional[l10s.Leaderboard]:
96
+ def evaluate(
97
+ self,
98
+ evaluators: Union[e8s.Evaluator, List[e8s.Evaluator]],
99
+ name: Optional[str] = None,
100
+ description: Optional[str] = None,
101
+ ) -> Optional[dashboards.Dashboard]:
96
102
  """Runs an evaluation for the test lab.
97
103
 
104
+ Args:
105
+ evaluators (Union[e8s.Evaluator, List[e8s.Evaluator]]): One or many evaluators
106
+ used to evaluate the test lab.
107
+ name (str, optional): Optional name for the evaluation.
108
+ description (str, optional): Optional description for the evaluation.
109
+
110
+ Returns:
111
+ Dashboard: Evaluation dashboard instance. In case launching of evaluation
112
+ fails, `None` is returned.
113
+ """
114
+ _evaluators = (
115
+ [evaluators] if isinstance(evaluators, e8s.Evaluator) else evaluators
116
+ )
117
+ name = name or self.name or "Imported Dashboard"
118
+ description = description or self.description or ""
119
+ req = apiModels.V1BatchImportLeaderboardRequest(
120
+ testLabJson=self.json(),
121
+ evaluators=[e.key for e in _evaluators],
122
+ model=None,
123
+ dashboardDisplayName=name,
124
+ dashboardDescription=description,
125
+ testDisplayName=f"{name} - Test",
126
+ testDescription=f"Test suite for {description}",
127
+ )
128
+ res = self._leaderboard_api.leaderboard_service_batch_import_leaderboard(req)
129
+
130
+ if res and res.operation:
131
+ return dashboards.Dashboard.from_operation(res.operation, self._client)
132
+
133
+ return None
134
+
135
+ def create_leaderboard(
136
+ self, evaluator: e8s.Evaluator
137
+ ) -> Optional[l10s.Leaderboard]:
138
+ """Creates a single leaderboard for the test lab.
139
+
98
140
  Args:
99
141
  evaluator: The evaluator to use for the evaluation.
142
+
143
+ Returns:
144
+ Leaderboard: Single evaluation leaderboard instance.
145
+ In case launching of evaluation fails, `None` is returned.
100
146
  """
101
147
  req = apiModels.V1ImportLeaderboardRequest(
102
148
  testLabJson=self.json(),
@@ -109,7 +155,7 @@ class TestLab:
109
155
  )
110
156
  res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
111
157
  if res and res.operation:
112
- return self._get_leaderboard_from_operation(res.operation)
158
+ return l10s.Leaderboard.from_operation(res.operation, self._client)
113
159
 
114
160
  return None
115
161
 
@@ -131,24 +177,6 @@ class TestLab:
131
177
 
132
178
  return json.dumps(lab, indent=4, sort_keys=True)
133
179
 
134
- def _get_leaderboard_from_operation(
135
- self, operation: apiModels.V1Operation
136
- ) -> Optional[l10s.Leaderboard]:
137
- """Retrieves the leaderboard from the operation, which created it.
138
-
139
- Args:
140
- operation: The operation that created the leaderboard.
141
- """
142
- if not operation.metadata:
143
- raise RuntimeError("Not possible to retrieve leaderboard from operation")
144
-
145
- leadeboard_id = operation.metadata.to_dict().get("leaderboard")
146
- res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
147
- if res and res.leaderboard:
148
- return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
149
-
150
- return None
151
-
152
180
  def _llm_model_names(self) -> List[str]:
153
181
  return [m.llm_model_name for m in self.models]
154
182