eval-studio-client 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. eval_studio_client/__about__.py +1 -0
  2. eval_studio_client/__init__.py +4 -0
  3. eval_studio_client/api/__init__.py +180 -0
  4. eval_studio_client/api/api/__init__.py +20 -0
  5. eval_studio_client/api/api/dashboard_service_api.py +2142 -0
  6. eval_studio_client/api/api/document_service_api.py +1868 -0
  7. eval_studio_client/api/api/evaluation_service_api.py +1603 -0
  8. eval_studio_client/api/api/evaluator_service_api.py +1343 -0
  9. eval_studio_client/api/api/info_service_api.py +275 -0
  10. eval_studio_client/api/api/leaderboard_service_api.py +3336 -0
  11. eval_studio_client/api/api/model_service_api.py +2913 -0
  12. eval_studio_client/api/api/operation_progress_service_api.py +292 -0
  13. eval_studio_client/api/api/operation_service_api.py +1359 -0
  14. eval_studio_client/api/api/perturbation_service_api.py +321 -0
  15. eval_studio_client/api/api/perturbator_service_api.py +532 -0
  16. eval_studio_client/api/api/test_case_service_api.py +1913 -0
  17. eval_studio_client/api/api/test_class_service_api.py +532 -0
  18. eval_studio_client/api/api/test_lab_service_api.py +634 -0
  19. eval_studio_client/api/api/test_service_api.py +2712 -0
  20. eval_studio_client/api/api/who_am_i_service_api.py +275 -0
  21. eval_studio_client/api/api_client.py +770 -0
  22. eval_studio_client/api/api_response.py +21 -0
  23. eval_studio_client/api/configuration.py +436 -0
  24. eval_studio_client/api/docs/DashboardServiceApi.md +549 -0
  25. eval_studio_client/api/docs/DocumentServiceApi.md +478 -0
  26. eval_studio_client/api/docs/EvaluationServiceApi.md +332 -0
  27. eval_studio_client/api/docs/EvaluatorServiceApi.md +345 -0
  28. eval_studio_client/api/docs/InfoServiceApi.md +71 -0
  29. eval_studio_client/api/docs/LeaderboardServiceApi.md +835 -0
  30. eval_studio_client/api/docs/ModelServiceApi.md +750 -0
  31. eval_studio_client/api/docs/OperationProgressServiceApi.md +75 -0
  32. eval_studio_client/api/docs/OperationServiceApi.md +345 -0
  33. eval_studio_client/api/docs/PerturbationServiceApi.md +78 -0
  34. eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +31 -0
  35. eval_studio_client/api/docs/PerturbatorServiceApi.md +138 -0
  36. eval_studio_client/api/docs/ProtobufAny.md +30 -0
  37. eval_studio_client/api/docs/RequiredTheDashboardToUpdate.md +41 -0
  38. eval_studio_client/api/docs/RequiredTheDocumentToUpdate.md +38 -0
  39. eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +54 -0
  40. eval_studio_client/api/docs/RequiredTheModelToUpdate.md +41 -0
  41. eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +39 -0
  42. eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +39 -0
  43. eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +39 -0
  44. eval_studio_client/api/docs/RequiredTheTestToUpdate.md +39 -0
  45. eval_studio_client/api/docs/RpcStatus.md +32 -0
  46. eval_studio_client/api/docs/TestCaseServiceApi.md +486 -0
  47. eval_studio_client/api/docs/TestCaseServiceBatchDeleteTestCasesRequest.md +29 -0
  48. eval_studio_client/api/docs/TestClassServiceApi.md +138 -0
  49. eval_studio_client/api/docs/TestLabServiceApi.md +151 -0
  50. eval_studio_client/api/docs/TestServiceApi.md +689 -0
  51. eval_studio_client/api/docs/TestServicePerturbTestRequest.md +31 -0
  52. eval_studio_client/api/docs/V1alphaBatchCreateLeaderboardsRequest.md +31 -0
  53. eval_studio_client/api/docs/V1alphaBatchCreateLeaderboardsResponse.md +29 -0
  54. eval_studio_client/api/docs/V1alphaBatchDeleteDashboardsRequest.md +29 -0
  55. eval_studio_client/api/docs/V1alphaBatchDeleteDashboardsResponse.md +29 -0
  56. eval_studio_client/api/docs/V1alphaBatchDeleteDocumentsRequest.md +29 -0
  57. eval_studio_client/api/docs/V1alphaBatchDeleteDocumentsResponse.md +29 -0
  58. eval_studio_client/api/docs/V1alphaBatchDeleteEvaluatorsRequest.md +29 -0
  59. eval_studio_client/api/docs/V1alphaBatchDeleteEvaluatorsResponse.md +29 -0
  60. eval_studio_client/api/docs/V1alphaBatchDeleteLeaderboardsRequest.md +30 -0
  61. eval_studio_client/api/docs/V1alphaBatchDeleteLeaderboardsResponse.md +29 -0
  62. eval_studio_client/api/docs/V1alphaBatchDeleteModelsRequest.md +29 -0
  63. eval_studio_client/api/docs/V1alphaBatchDeleteModelsResponse.md +29 -0
  64. eval_studio_client/api/docs/V1alphaBatchDeleteTestCasesResponse.md +29 -0
  65. eval_studio_client/api/docs/V1alphaBatchDeleteTestsRequest.md +30 -0
  66. eval_studio_client/api/docs/V1alphaBatchDeleteTestsResponse.md +29 -0
  67. eval_studio_client/api/docs/V1alphaBatchGetDashboardsResponse.md +29 -0
  68. eval_studio_client/api/docs/V1alphaBatchGetDocumentsResponse.md +29 -0
  69. eval_studio_client/api/docs/V1alphaBatchGetLeaderboardsResponse.md +29 -0
  70. eval_studio_client/api/docs/V1alphaBatchGetModelsResponse.md +29 -0
  71. eval_studio_client/api/docs/V1alphaBatchGetOperationsResponse.md +29 -0
  72. eval_studio_client/api/docs/V1alphaBatchGetTestsResponse.md +29 -0
  73. eval_studio_client/api/docs/V1alphaBatchImportLeaderboardRequest.md +37 -0
  74. eval_studio_client/api/docs/V1alphaBatchImportLeaderboardResponse.md +29 -0
  75. eval_studio_client/api/docs/V1alphaBatchImportTestsRequest.md +32 -0
  76. eval_studio_client/api/docs/V1alphaBatchImportTestsResponse.md +29 -0
  77. eval_studio_client/api/docs/V1alphaCheckBaseModelsResponse.md +30 -0
  78. eval_studio_client/api/docs/V1alphaCollectionInfo.md +33 -0
  79. eval_studio_client/api/docs/V1alphaCreateDashboardResponse.md +29 -0
  80. eval_studio_client/api/docs/V1alphaCreateDocumentResponse.md +29 -0
  81. eval_studio_client/api/docs/V1alphaCreateEvaluationRequest.md +37 -0
  82. eval_studio_client/api/docs/V1alphaCreateEvaluatorResponse.md +29 -0
  83. eval_studio_client/api/docs/V1alphaCreateLeaderboardRequest.md +29 -0
  84. eval_studio_client/api/docs/V1alphaCreateLeaderboardResponse.md +29 -0
  85. eval_studio_client/api/docs/V1alphaCreateLeaderboardWithoutCacheResponse.md +29 -0
  86. eval_studio_client/api/docs/V1alphaCreateModelResponse.md +29 -0
  87. eval_studio_client/api/docs/V1alphaCreatePerturbationResponse.md +29 -0
  88. eval_studio_client/api/docs/V1alphaCreateTestCaseResponse.md +29 -0
  89. eval_studio_client/api/docs/V1alphaCreateTestLabResponse.md +29 -0
  90. eval_studio_client/api/docs/V1alphaCreateTestResponse.md +29 -0
  91. eval_studio_client/api/docs/V1alphaDashboard.md +41 -0
  92. eval_studio_client/api/docs/V1alphaDashboardStatus.md +12 -0
  93. eval_studio_client/api/docs/V1alphaDeleteDashboardResponse.md +29 -0
  94. eval_studio_client/api/docs/V1alphaDeleteDocumentResponse.md +29 -0
  95. eval_studio_client/api/docs/V1alphaDeleteEvaluatorResponse.md +29 -0
  96. eval_studio_client/api/docs/V1alphaDeleteLeaderboardResponse.md +29 -0
  97. eval_studio_client/api/docs/V1alphaDeleteModelResponse.md +29 -0
  98. eval_studio_client/api/docs/V1alphaDeleteTestCaseResponse.md +29 -0
  99. eval_studio_client/api/docs/V1alphaDeleteTestResponse.md +29 -0
  100. eval_studio_client/api/docs/V1alphaDocument.md +38 -0
  101. eval_studio_client/api/docs/V1alphaEvaluationTest.md +32 -0
  102. eval_studio_client/api/docs/V1alphaEvaluator.md +45 -0
  103. eval_studio_client/api/docs/V1alphaEvaluatorParamType.md +12 -0
  104. eval_studio_client/api/docs/V1alphaEvaluatorParameter.md +40 -0
  105. eval_studio_client/api/docs/V1alphaEvaluatorView.md +12 -0
  106. eval_studio_client/api/docs/V1alphaFinalizeOperationResponse.md +29 -0
  107. eval_studio_client/api/docs/V1alphaFindAllTestCasesByIDResponse.md +29 -0
  108. eval_studio_client/api/docs/V1alphaFindTestLabResponse.md +29 -0
  109. eval_studio_client/api/docs/V1alphaGetDashboardResponse.md +29 -0
  110. eval_studio_client/api/docs/V1alphaGetDocumentResponse.md +29 -0
  111. eval_studio_client/api/docs/V1alphaGetEvaluatorResponse.md +29 -0
  112. eval_studio_client/api/docs/V1alphaGetInfoResponse.md +29 -0
  113. eval_studio_client/api/docs/V1alphaGetLeaderboardResponse.md +29 -0
  114. eval_studio_client/api/docs/V1alphaGetModelResponse.md +29 -0
  115. eval_studio_client/api/docs/V1alphaGetOperationProgressByParentResponse.md +29 -0
  116. eval_studio_client/api/docs/V1alphaGetOperationResponse.md +29 -0
  117. eval_studio_client/api/docs/V1alphaGetPerturbatorResponse.md +29 -0
  118. eval_studio_client/api/docs/V1alphaGetTestCaseResponse.md +29 -0
  119. eval_studio_client/api/docs/V1alphaGetTestClassResponse.md +29 -0
  120. eval_studio_client/api/docs/V1alphaGetTestResponse.md +29 -0
  121. eval_studio_client/api/docs/V1alphaImportEvaluationRequest.md +33 -0
  122. eval_studio_client/api/docs/V1alphaImportLeaderboardRequest.md +37 -0
  123. eval_studio_client/api/docs/V1alphaImportLeaderboardResponse.md +29 -0
  124. eval_studio_client/api/docs/V1alphaInfo.md +35 -0
  125. eval_studio_client/api/docs/V1alphaInsight.md +40 -0
  126. eval_studio_client/api/docs/V1alphaLeaderboard.md +54 -0
  127. eval_studio_client/api/docs/V1alphaLeaderboardStatus.md +12 -0
  128. eval_studio_client/api/docs/V1alphaLeaderboardType.md +12 -0
  129. eval_studio_client/api/docs/V1alphaLeaderboardView.md +12 -0
  130. eval_studio_client/api/docs/V1alphaListBaseModelsResponse.md +29 -0
  131. eval_studio_client/api/docs/V1alphaListDashboardsResponse.md +29 -0
  132. eval_studio_client/api/docs/V1alphaListDocumentsResponse.md +29 -0
  133. eval_studio_client/api/docs/V1alphaListEvaluatorsResponse.md +29 -0
  134. eval_studio_client/api/docs/V1alphaListLLMModelsResponse.md +29 -0
  135. eval_studio_client/api/docs/V1alphaListLeaderboardsResponse.md +30 -0
  136. eval_studio_client/api/docs/V1alphaListModelCollectionsResponse.md +29 -0
  137. eval_studio_client/api/docs/V1alphaListModelsResponse.md +29 -0
  138. eval_studio_client/api/docs/V1alphaListMostRecentDashboardsResponse.md +29 -0
  139. eval_studio_client/api/docs/V1alphaListMostRecentLeaderboardsResponse.md +29 -0
  140. eval_studio_client/api/docs/V1alphaListMostRecentModelsResponse.md +29 -0
  141. eval_studio_client/api/docs/V1alphaListMostRecentTestsResponse.md +29 -0
  142. eval_studio_client/api/docs/V1alphaListOperationsResponse.md +29 -0
  143. eval_studio_client/api/docs/V1alphaListPerturbatorsResponse.md +29 -0
  144. eval_studio_client/api/docs/V1alphaListRAGCollectionsResponse.md +29 -0
  145. eval_studio_client/api/docs/V1alphaListTestCasesResponse.md +29 -0
  146. eval_studio_client/api/docs/V1alphaListTestClassesResponse.md +29 -0
  147. eval_studio_client/api/docs/V1alphaListTestsResponse.md +29 -0
  148. eval_studio_client/api/docs/V1alphaModel.md +42 -0
  149. eval_studio_client/api/docs/V1alphaModelType.md +12 -0
  150. eval_studio_client/api/docs/V1alphaOperation.md +40 -0
  151. eval_studio_client/api/docs/V1alphaOperationProgress.md +32 -0
  152. eval_studio_client/api/docs/V1alphaPerturbTestResponse.md +29 -0
  153. eval_studio_client/api/docs/V1alphaPerturbator.md +39 -0
  154. eval_studio_client/api/docs/V1alphaPerturbatorConfiguration.md +32 -0
  155. eval_studio_client/api/docs/V1alphaPerturbatorIntensity.md +11 -0
  156. eval_studio_client/api/docs/V1alphaProblemAndAction.md +39 -0
  157. eval_studio_client/api/docs/V1alphaTest.md +40 -0
  158. eval_studio_client/api/docs/V1alphaTestCase.md +40 -0
  159. eval_studio_client/api/docs/V1alphaTestCaseRelationship.md +31 -0
  160. eval_studio_client/api/docs/V1alphaTestClass.md +41 -0
  161. eval_studio_client/api/docs/V1alphaTestClassType.md +12 -0
  162. eval_studio_client/api/docs/V1alphaTestLab.md +41 -0
  163. eval_studio_client/api/docs/V1alphaUpdateDashboardResponse.md +29 -0
  164. eval_studio_client/api/docs/V1alphaUpdateDocumentResponse.md +29 -0
  165. eval_studio_client/api/docs/V1alphaUpdateLeaderboardResponse.md +29 -0
  166. eval_studio_client/api/docs/V1alphaUpdateModelResponse.md +29 -0
  167. eval_studio_client/api/docs/V1alphaUpdateOperationResponse.md +29 -0
  168. eval_studio_client/api/docs/V1alphaUpdateTestCaseResponse.md +29 -0
  169. eval_studio_client/api/docs/V1alphaUpdateTestResponse.md +29 -0
  170. eval_studio_client/api/docs/V1alphaWhoAmIResponse.md +31 -0
  171. eval_studio_client/api/docs/WhoAmIServiceApi.md +72 -0
  172. eval_studio_client/api/exceptions.py +199 -0
  173. eval_studio_client/api/models/__init__.py +148 -0
  174. eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +115 -0
  175. eval_studio_client/api/models/protobuf_any.py +100 -0
  176. eval_studio_client/api/models/required_the_dashboard_to_update.py +127 -0
  177. eval_studio_client/api/models/required_the_document_to_update.py +116 -0
  178. eval_studio_client/api/models/required_the_leaderboard_to_update.py +178 -0
  179. eval_studio_client/api/models/required_the_model_to_update.py +127 -0
  180. eval_studio_client/api/models/required_the_operation_to_finalize.py +129 -0
  181. eval_studio_client/api/models/required_the_operation_to_update.py +129 -0
  182. eval_studio_client/api/models/required_the_test_case_to_update.py +120 -0
  183. eval_studio_client/api/models/required_the_test_to_update.py +122 -0
  184. eval_studio_client/api/models/rpc_status.py +99 -0
  185. eval_studio_client/api/models/test_case_service_batch_delete_test_cases_request.py +87 -0
  186. eval_studio_client/api/models/test_service_perturb_test_request.py +99 -0
  187. eval_studio_client/api/models/v1alpha_batch_create_leaderboards_request.py +99 -0
  188. eval_studio_client/api/models/v1alpha_batch_create_leaderboards_response.py +91 -0
  189. eval_studio_client/api/models/v1alpha_batch_delete_dashboards_request.py +87 -0
  190. eval_studio_client/api/models/v1alpha_batch_delete_dashboards_response.py +95 -0
  191. eval_studio_client/api/models/v1alpha_batch_delete_documents_request.py +87 -0
  192. eval_studio_client/api/models/v1alpha_batch_delete_documents_response.py +95 -0
  193. eval_studio_client/api/models/v1alpha_batch_delete_evaluators_request.py +87 -0
  194. eval_studio_client/api/models/v1alpha_batch_delete_evaluators_response.py +95 -0
  195. eval_studio_client/api/models/v1alpha_batch_delete_leaderboards_request.py +90 -0
  196. eval_studio_client/api/models/v1alpha_batch_delete_leaderboards_response.py +95 -0
  197. eval_studio_client/api/models/v1alpha_batch_delete_models_request.py +87 -0
  198. eval_studio_client/api/models/v1alpha_batch_delete_models_response.py +95 -0
  199. eval_studio_client/api/models/v1alpha_batch_delete_test_cases_response.py +95 -0
  200. eval_studio_client/api/models/v1alpha_batch_delete_tests_request.py +89 -0
  201. eval_studio_client/api/models/v1alpha_batch_delete_tests_response.py +95 -0
  202. eval_studio_client/api/models/v1alpha_batch_get_dashboards_response.py +95 -0
  203. eval_studio_client/api/models/v1alpha_batch_get_documents_response.py +95 -0
  204. eval_studio_client/api/models/v1alpha_batch_get_leaderboards_response.py +95 -0
  205. eval_studio_client/api/models/v1alpha_batch_get_models_response.py +95 -0
  206. eval_studio_client/api/models/v1alpha_batch_get_operations_response.py +95 -0
  207. eval_studio_client/api/models/v1alpha_batch_get_tests_response.py +95 -0
  208. eval_studio_client/api/models/v1alpha_batch_import_leaderboard_request.py +104 -0
  209. eval_studio_client/api/models/v1alpha_batch_import_leaderboard_response.py +91 -0
  210. eval_studio_client/api/models/v1alpha_batch_import_tests_request.py +93 -0
  211. eval_studio_client/api/models/v1alpha_batch_import_tests_response.py +95 -0
  212. eval_studio_client/api/models/v1alpha_check_base_models_response.py +89 -0
  213. eval_studio_client/api/models/v1alpha_collection_info.py +93 -0
  214. eval_studio_client/api/models/v1alpha_create_dashboard_response.py +91 -0
  215. eval_studio_client/api/models/v1alpha_create_document_response.py +91 -0
  216. eval_studio_client/api/models/v1alpha_create_evaluation_request.py +115 -0
  217. eval_studio_client/api/models/v1alpha_create_evaluator_response.py +91 -0
  218. eval_studio_client/api/models/v1alpha_create_leaderboard_request.py +91 -0
  219. eval_studio_client/api/models/v1alpha_create_leaderboard_response.py +91 -0
  220. eval_studio_client/api/models/v1alpha_create_leaderboard_without_cache_response.py +91 -0
  221. eval_studio_client/api/models/v1alpha_create_model_response.py +91 -0
  222. eval_studio_client/api/models/v1alpha_create_perturbation_response.py +87 -0
  223. eval_studio_client/api/models/v1alpha_create_test_case_response.py +91 -0
  224. eval_studio_client/api/models/v1alpha_create_test_lab_response.py +91 -0
  225. eval_studio_client/api/models/v1alpha_create_test_response.py +91 -0
  226. eval_studio_client/api/models/v1alpha_dashboard.py +131 -0
  227. eval_studio_client/api/models/v1alpha_dashboard_status.py +39 -0
  228. eval_studio_client/api/models/v1alpha_delete_dashboard_response.py +91 -0
  229. eval_studio_client/api/models/v1alpha_delete_document_response.py +91 -0
  230. eval_studio_client/api/models/v1alpha_delete_evaluator_response.py +91 -0
  231. eval_studio_client/api/models/v1alpha_delete_leaderboard_response.py +91 -0
  232. eval_studio_client/api/models/v1alpha_delete_model_response.py +91 -0
  233. eval_studio_client/api/models/v1alpha_delete_test_case_response.py +91 -0
  234. eval_studio_client/api/models/v1alpha_delete_test_response.py +91 -0
  235. eval_studio_client/api/models/v1alpha_document.py +120 -0
  236. eval_studio_client/api/models/v1alpha_evaluation_test.py +107 -0
  237. eval_studio_client/api/models/v1alpha_evaluator.py +155 -0
  238. eval_studio_client/api/models/v1alpha_evaluator_param_type.py +42 -0
  239. eval_studio_client/api/models/v1alpha_evaluator_parameter.py +126 -0
  240. eval_studio_client/api/models/v1alpha_evaluator_view.py +38 -0
  241. eval_studio_client/api/models/v1alpha_finalize_operation_response.py +91 -0
  242. eval_studio_client/api/models/v1alpha_find_all_test_cases_by_id_response.py +95 -0
  243. eval_studio_client/api/models/v1alpha_find_test_lab_response.py +91 -0
  244. eval_studio_client/api/models/v1alpha_get_dashboard_response.py +91 -0
  245. eval_studio_client/api/models/v1alpha_get_document_response.py +91 -0
  246. eval_studio_client/api/models/v1alpha_get_evaluator_response.py +91 -0
  247. eval_studio_client/api/models/v1alpha_get_info_response.py +91 -0
  248. eval_studio_client/api/models/v1alpha_get_leaderboard_response.py +91 -0
  249. eval_studio_client/api/models/v1alpha_get_model_response.py +91 -0
  250. eval_studio_client/api/models/v1alpha_get_operation_progress_by_parent_response.py +91 -0
  251. eval_studio_client/api/models/v1alpha_get_operation_response.py +91 -0
  252. eval_studio_client/api/models/v1alpha_get_perturbator_response.py +91 -0
  253. eval_studio_client/api/models/v1alpha_get_test_case_response.py +91 -0
  254. eval_studio_client/api/models/v1alpha_get_test_class_response.py +91 -0
  255. eval_studio_client/api/models/v1alpha_get_test_response.py +91 -0
  256. eval_studio_client/api/models/v1alpha_import_evaluation_request.py +99 -0
  257. eval_studio_client/api/models/v1alpha_import_leaderboard_request.py +104 -0
  258. eval_studio_client/api/models/v1alpha_import_leaderboard_response.py +91 -0
  259. eval_studio_client/api/models/v1alpha_info.py +99 -0
  260. eval_studio_client/api/models/v1alpha_insight.py +107 -0
  261. eval_studio_client/api/models/v1alpha_leaderboard.py +182 -0
  262. eval_studio_client/api/models/v1alpha_leaderboard_status.py +39 -0
  263. eval_studio_client/api/models/v1alpha_leaderboard_type.py +39 -0
  264. eval_studio_client/api/models/v1alpha_leaderboard_view.py +39 -0
  265. eval_studio_client/api/models/v1alpha_list_base_models_response.py +87 -0
  266. eval_studio_client/api/models/v1alpha_list_dashboards_response.py +95 -0
  267. eval_studio_client/api/models/v1alpha_list_documents_response.py +95 -0
  268. eval_studio_client/api/models/v1alpha_list_evaluators_response.py +95 -0
  269. eval_studio_client/api/models/v1alpha_list_leaderboards_response.py +97 -0
  270. eval_studio_client/api/models/v1alpha_list_llm_models_response.py +87 -0
  271. eval_studio_client/api/models/v1alpha_list_model_collections_response.py +95 -0
  272. eval_studio_client/api/models/v1alpha_list_models_response.py +95 -0
  273. eval_studio_client/api/models/v1alpha_list_most_recent_dashboards_response.py +95 -0
  274. eval_studio_client/api/models/v1alpha_list_most_recent_leaderboards_response.py +95 -0
  275. eval_studio_client/api/models/v1alpha_list_most_recent_models_response.py +95 -0
  276. eval_studio_client/api/models/v1alpha_list_most_recent_tests_response.py +95 -0
  277. eval_studio_client/api/models/v1alpha_list_operations_response.py +95 -0
  278. eval_studio_client/api/models/v1alpha_list_perturbators_response.py +95 -0
  279. eval_studio_client/api/models/v1alpha_list_rag_collections_response.py +95 -0
  280. eval_studio_client/api/models/v1alpha_list_test_cases_response.py +95 -0
  281. eval_studio_client/api/models/v1alpha_list_test_classes_response.py +95 -0
  282. eval_studio_client/api/models/v1alpha_list_tests_response.py +95 -0
  283. eval_studio_client/api/models/v1alpha_model.py +131 -0
  284. eval_studio_client/api/models/v1alpha_model_type.py +46 -0
  285. eval_studio_client/api/models/v1alpha_operation.py +133 -0
  286. eval_studio_client/api/models/v1alpha_operation_progress.py +99 -0
  287. eval_studio_client/api/models/v1alpha_perturb_test_response.py +91 -0
  288. eval_studio_client/api/models/v1alpha_perturbator.py +122 -0
  289. eval_studio_client/api/models/v1alpha_perturbator_configuration.py +92 -0
  290. eval_studio_client/api/models/v1alpha_perturbator_intensity.py +39 -0
  291. eval_studio_client/api/models/v1alpha_problem_and_action.py +129 -0
  292. eval_studio_client/api/models/v1alpha_test.py +126 -0
  293. eval_studio_client/api/models/v1alpha_test_case.py +124 -0
  294. eval_studio_client/api/models/v1alpha_test_case_relationship.py +91 -0
  295. eval_studio_client/api/models/v1alpha_test_class.py +127 -0
  296. eval_studio_client/api/models/v1alpha_test_class_type.py +42 -0
  297. eval_studio_client/api/models/v1alpha_test_lab.py +137 -0
  298. eval_studio_client/api/models/v1alpha_update_dashboard_response.py +91 -0
  299. eval_studio_client/api/models/v1alpha_update_document_response.py +91 -0
  300. eval_studio_client/api/models/v1alpha_update_leaderboard_response.py +91 -0
  301. eval_studio_client/api/models/v1alpha_update_model_response.py +91 -0
  302. eval_studio_client/api/models/v1alpha_update_operation_response.py +91 -0
  303. eval_studio_client/api/models/v1alpha_update_test_case_response.py +91 -0
  304. eval_studio_client/api/models/v1alpha_update_test_response.py +91 -0
  305. eval_studio_client/api/models/v1alpha_who_am_i_response.py +91 -0
  306. eval_studio_client/api/rest.py +257 -0
  307. eval_studio_client/api/test/__init__.py +0 -0
  308. eval_studio_client/api/test/test_dashboard_service_api.py +79 -0
  309. eval_studio_client/api/test/test_document_service_api.py +73 -0
  310. eval_studio_client/api/test/test_evaluation_service_api.py +55 -0
  311. eval_studio_client/api/test/test_evaluator_service_api.py +61 -0
  312. eval_studio_client/api/test/test_info_service_api.py +37 -0
  313. eval_studio_client/api/test/test_leaderboard_service_api.py +103 -0
  314. eval_studio_client/api/test/test_model_service_api.py +97 -0
  315. eval_studio_client/api/test/test_operation_progress_service_api.py +37 -0
  316. eval_studio_client/api/test/test_operation_service_api.py +61 -0
  317. eval_studio_client/api/test/test_perturbation_service_api.py +37 -0
  318. eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +79 -0
  319. eval_studio_client/api/test/test_perturbator_service_api.py +43 -0
  320. eval_studio_client/api/test/test_protobuf_any.py +51 -0
  321. eval_studio_client/api/test/test_required_the_dashboard_to_update.py +64 -0
  322. eval_studio_client/api/test/test_required_the_document_to_update.py +59 -0
  323. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +115 -0
  324. eval_studio_client/api/test/test_required_the_model_to_update.py +63 -0
  325. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +71 -0
  326. eval_studio_client/api/test/test_required_the_operation_to_update.py +71 -0
  327. eval_studio_client/api/test/test_required_the_test_case_to_update.py +63 -0
  328. eval_studio_client/api/test/test_required_the_test_to_update.py +65 -0
  329. eval_studio_client/api/test/test_rpc_status.py +57 -0
  330. eval_studio_client/api/test/test_test_case_service_api.py +73 -0
  331. eval_studio_client/api/test/test_test_case_service_batch_delete_test_cases_request.py +53 -0
  332. eval_studio_client/api/test/test_test_class_service_api.py +43 -0
  333. eval_studio_client/api/test/test_test_lab_service_api.py +43 -0
  334. eval_studio_client/api/test/test_test_service_api.py +91 -0
  335. eval_studio_client/api/test/test_test_service_perturb_test_request.py +58 -0
  336. eval_studio_client/api/test/test_v1alpha_batch_create_leaderboards_request.py +119 -0
  337. eval_studio_client/api/test/test_v1alpha_batch_create_leaderboards_response.py +71 -0
  338. eval_studio_client/api/test/test_v1alpha_batch_delete_dashboards_request.py +53 -0
  339. eval_studio_client/api/test/test_v1alpha_batch_delete_dashboards_response.py +68 -0
  340. eval_studio_client/api/test/test_v1alpha_batch_delete_documents_request.py +53 -0
  341. eval_studio_client/api/test/test_v1alpha_batch_delete_documents_response.py +63 -0
  342. eval_studio_client/api/test/test_v1alpha_batch_delete_evaluators_request.py +53 -0
  343. eval_studio_client/api/test/test_v1alpha_batch_delete_evaluators_response.py +91 -0
  344. eval_studio_client/api/test/test_v1alpha_batch_delete_leaderboards_request.py +54 -0
  345. eval_studio_client/api/test/test_v1alpha_batch_delete_leaderboards_response.py +116 -0
  346. eval_studio_client/api/test/test_v1alpha_batch_delete_models_request.py +53 -0
  347. eval_studio_client/api/test/test_v1alpha_batch_delete_models_response.py +67 -0
  348. eval_studio_client/api/test/test_v1alpha_batch_delete_test_cases_response.py +67 -0
  349. eval_studio_client/api/test/test_v1alpha_batch_delete_tests_request.py +54 -0
  350. eval_studio_client/api/test/test_v1alpha_batch_delete_tests_response.py +69 -0
  351. eval_studio_client/api/test/test_v1alpha_batch_get_dashboards_response.py +68 -0
  352. eval_studio_client/api/test/test_v1alpha_batch_get_documents_response.py +63 -0
  353. eval_studio_client/api/test/test_v1alpha_batch_get_leaderboards_response.py +116 -0
  354. eval_studio_client/api/test/test_v1alpha_batch_get_models_response.py +67 -0
  355. eval_studio_client/api/test/test_v1alpha_batch_get_operations_response.py +73 -0
  356. eval_studio_client/api/test/test_v1alpha_batch_get_tests_response.py +69 -0
  357. eval_studio_client/api/test/test_v1alpha_batch_import_leaderboard_request.py +61 -0
  358. eval_studio_client/api/test/test_v1alpha_batch_import_leaderboard_response.py +71 -0
  359. eval_studio_client/api/test/test_v1alpha_batch_import_tests_request.py +54 -0
  360. eval_studio_client/api/test/test_v1alpha_batch_import_tests_response.py +69 -0
  361. eval_studio_client/api/test/test_v1alpha_check_base_models_response.py +52 -0
  362. eval_studio_client/api/test/test_v1alpha_collection_info.py +54 -0
  363. eval_studio_client/api/test/test_v1alpha_create_dashboard_response.py +66 -0
  364. eval_studio_client/api/test/test_v1alpha_create_document_response.py +61 -0
  365. eval_studio_client/api/test/test_v1alpha_create_evaluation_request.py +107 -0
  366. eval_studio_client/api/test/test_v1alpha_create_evaluator_response.py +89 -0
  367. eval_studio_client/api/test/test_v1alpha_create_leaderboard_request.py +114 -0
  368. eval_studio_client/api/test/test_v1alpha_create_leaderboard_response.py +71 -0
  369. eval_studio_client/api/test/test_v1alpha_create_leaderboard_without_cache_response.py +71 -0
  370. eval_studio_client/api/test/test_v1alpha_create_model_response.py +65 -0
  371. eval_studio_client/api/test/test_v1alpha_create_perturbation_response.py +51 -0
  372. eval_studio_client/api/test/test_v1alpha_create_test_case_response.py +65 -0
  373. eval_studio_client/api/test/test_v1alpha_create_test_lab_response.py +68 -0
  374. eval_studio_client/api/test/test_v1alpha_create_test_response.py +67 -0
  375. eval_studio_client/api/test/test_v1alpha_dashboard.py +65 -0
  376. eval_studio_client/api/test/test_v1alpha_dashboard_status.py +33 -0
  377. eval_studio_client/api/test/test_v1alpha_delete_dashboard_response.py +66 -0
  378. eval_studio_client/api/test/test_v1alpha_delete_document_response.py +61 -0
  379. eval_studio_client/api/test/test_v1alpha_delete_evaluator_response.py +89 -0
  380. eval_studio_client/api/test/test_v1alpha_delete_leaderboard_response.py +114 -0
  381. eval_studio_client/api/test/test_v1alpha_delete_model_response.py +65 -0
  382. eval_studio_client/api/test/test_v1alpha_delete_test_case_response.py +65 -0
  383. eval_studio_client/api/test/test_v1alpha_delete_test_response.py +67 -0
  384. eval_studio_client/api/test/test_v1alpha_document.py +60 -0
  385. eval_studio_client/api/test/test_v1alpha_evaluation_test.py +76 -0
  386. eval_studio_client/api/test/test_v1alpha_evaluator.py +91 -0
  387. eval_studio_client/api/test/test_v1alpha_evaluator_param_type.py +33 -0
  388. eval_studio_client/api/test/test_v1alpha_evaluator_parameter.py +68 -0
  389. eval_studio_client/api/test/test_v1alpha_evaluator_view.py +33 -0
  390. eval_studio_client/api/test/test_v1alpha_finalize_operation_response.py +71 -0
  391. eval_studio_client/api/test/test_v1alpha_find_all_test_cases_by_id_response.py +67 -0
  392. eval_studio_client/api/test/test_v1alpha_find_test_lab_response.py +68 -0
  393. eval_studio_client/api/test/test_v1alpha_get_dashboard_response.py +66 -0
  394. eval_studio_client/api/test/test_v1alpha_get_document_response.py +61 -0
  395. eval_studio_client/api/test/test_v1alpha_get_evaluator_response.py +89 -0
  396. eval_studio_client/api/test/test_v1alpha_get_info_response.py +60 -0
  397. eval_studio_client/api/test/test_v1alpha_get_leaderboard_response.py +114 -0
  398. eval_studio_client/api/test/test_v1alpha_get_model_response.py +65 -0
  399. eval_studio_client/api/test/test_v1alpha_get_operation_progress_by_parent_response.py +55 -0
  400. eval_studio_client/api/test/test_v1alpha_get_operation_response.py +71 -0
  401. eval_studio_client/api/test/test_v1alpha_get_perturbator_response.py +64 -0
  402. eval_studio_client/api/test/test_v1alpha_get_test_case_response.py +65 -0
  403. eval_studio_client/api/test/test_v1alpha_get_test_class_response.py +70 -0
  404. eval_studio_client/api/test/test_v1alpha_get_test_response.py +67 -0
  405. eval_studio_client/api/test/test_v1alpha_import_evaluation_request.py +73 -0
  406. eval_studio_client/api/test/test_v1alpha_import_leaderboard_request.py +59 -0
  407. eval_studio_client/api/test/test_v1alpha_import_leaderboard_response.py +71 -0
  408. eval_studio_client/api/test/test_v1alpha_info.py +59 -0
  409. eval_studio_client/api/test/test_v1alpha_insight.py +67 -0
  410. eval_studio_client/api/test/test_v1alpha_leaderboard.py +116 -0
  411. eval_studio_client/api/test/test_v1alpha_leaderboard_status.py +33 -0
  412. eval_studio_client/api/test/test_v1alpha_leaderboard_type.py +33 -0
  413. eval_studio_client/api/test/test_v1alpha_leaderboard_view.py +33 -0
  414. eval_studio_client/api/test/test_v1alpha_list_base_models_response.py +53 -0
  415. eval_studio_client/api/test/test_v1alpha_list_dashboards_response.py +68 -0
  416. eval_studio_client/api/test/test_v1alpha_list_documents_response.py +63 -0
  417. eval_studio_client/api/test/test_v1alpha_list_evaluators_response.py +91 -0
  418. eval_studio_client/api/test/test_v1alpha_list_leaderboards_response.py +117 -0
  419. eval_studio_client/api/test/test_v1alpha_list_llm_models_response.py +53 -0
  420. eval_studio_client/api/test/test_v1alpha_list_model_collections_response.py +57 -0
  421. eval_studio_client/api/test/test_v1alpha_list_models_response.py +67 -0
  422. eval_studio_client/api/test/test_v1alpha_list_most_recent_dashboards_response.py +68 -0
  423. eval_studio_client/api/test/test_v1alpha_list_most_recent_leaderboards_response.py +116 -0
  424. eval_studio_client/api/test/test_v1alpha_list_most_recent_models_response.py +67 -0
  425. eval_studio_client/api/test/test_v1alpha_list_most_recent_tests_response.py +69 -0
  426. eval_studio_client/api/test/test_v1alpha_list_operations_response.py +73 -0
  427. eval_studio_client/api/test/test_v1alpha_list_perturbators_response.py +66 -0
  428. eval_studio_client/api/test/test_v1alpha_list_rag_collections_response.py +57 -0
  429. eval_studio_client/api/test/test_v1alpha_list_test_cases_response.py +67 -0
  430. eval_studio_client/api/test/test_v1alpha_list_test_classes_response.py +72 -0
  431. eval_studio_client/api/test/test_v1alpha_list_tests_response.py +69 -0
  432. eval_studio_client/api/test/test_v1alpha_model.py +64 -0
  433. eval_studio_client/api/test/test_v1alpha_model_type.py +33 -0
  434. eval_studio_client/api/test/test_v1alpha_operation.py +72 -0
  435. eval_studio_client/api/test/test_v1alpha_operation_progress.py +54 -0
  436. eval_studio_client/api/test/test_v1alpha_perturb_test_response.py +67 -0
  437. eval_studio_client/api/test/test_v1alpha_perturbator.py +63 -0
  438. eval_studio_client/api/test/test_v1alpha_perturbator_configuration.py +53 -0
  439. eval_studio_client/api/test/test_v1alpha_perturbator_intensity.py +33 -0
  440. eval_studio_client/api/test/test_v1alpha_problem_and_action.py +65 -0
  441. eval_studio_client/api/test/test_v1alpha_test.py +66 -0
  442. eval_studio_client/api/test/test_v1alpha_test_case.py +64 -0
  443. eval_studio_client/api/test/test_v1alpha_test_case_relationship.py +53 -0
  444. eval_studio_client/api/test/test_v1alpha_test_class.py +69 -0
  445. eval_studio_client/api/test/test_v1alpha_test_class_type.py +33 -0
  446. eval_studio_client/api/test/test_v1alpha_test_lab.py +67 -0
  447. eval_studio_client/api/test/test_v1alpha_update_dashboard_response.py +66 -0
  448. eval_studio_client/api/test/test_v1alpha_update_document_response.py +61 -0
  449. eval_studio_client/api/test/test_v1alpha_update_leaderboard_response.py +114 -0
  450. eval_studio_client/api/test/test_v1alpha_update_model_response.py +65 -0
  451. eval_studio_client/api/test/test_v1alpha_update_operation_response.py +71 -0
  452. eval_studio_client/api/test/test_v1alpha_update_test_case_response.py +65 -0
  453. eval_studio_client/api/test/test_v1alpha_update_test_response.py +67 -0
  454. eval_studio_client/api/test/test_v1alpha_who_am_i_response.py +53 -0
  455. eval_studio_client/api/test/test_who_am_i_service_api.py +38 -0
  456. eval_studio_client/client.py +98 -0
  457. eval_studio_client/dashboards.py +187 -0
  458. eval_studio_client/documents.py +95 -0
  459. eval_studio_client/evaluators.py +65 -0
  460. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +6043 -0
  461. eval_studio_client/insights.py +35 -0
  462. eval_studio_client/leaderboards.py +207 -0
  463. eval_studio_client/models.py +522 -0
  464. eval_studio_client/perturbators.py +101 -0
  465. eval_studio_client/problems.py +50 -0
  466. eval_studio_client/test_labs.py +319 -0
  467. eval_studio_client/tests.py +369 -0
  468. eval_studio_client-0.7.0.dist-info/METADATA +18 -0
  469. eval_studio_client-0.7.0.dist-info/RECORD +470 -0
  470. eval_studio_client-0.7.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,522 @@
1
+ import dataclasses
2
+ import datetime
3
+ import json
4
+ from typing import List
5
+ from typing import Optional
6
+ from typing import Union
7
+
8
+ from eval_studio_client import api
9
+ from eval_studio_client import dashboards as d8s
10
+ from eval_studio_client import evaluators as e8s
11
+ from eval_studio_client import leaderboards as l10s
12
+ from eval_studio_client import tests
13
+ from eval_studio_client.api import models
14
+
15
+ # Key for Azure environment ID parameter within Model parameters.
16
+ _AZURE_ENV_ID_PARAM = "environment_id"
17
+
18
+
19
+ @dataclasses.dataclass
20
+ class Model:
21
+ """Represents Eval Studio connection to an external RAG/LLM system.
22
+
23
+ Attributes:
24
+ key (str): Generated ID of the model.
25
+ name (str): Name of the model.
26
+ description (str): Description of the model.
27
+ url (str): URL of the model host system.
28
+ api_key (str): API key for the model host system.
29
+ is_rag (bool): Whether the model is a RAG or LLM-only system.
30
+ create_time (datetime): Timestamp of the model creation.
31
+ update_time (datetime): Timestamp of the last model update.
32
+ """
33
+
34
+ key: str
35
+ name: str
36
+ description: str
37
+ url: str
38
+ api_key: str
39
+ is_rag: bool
40
+ create_time: Optional[datetime.datetime] = None
41
+ update_time: Optional[datetime.datetime] = None
42
+ _client: Optional[api.ApiClient] = None
43
+
44
+ def __post_init__(self):
45
+ if self._client:
46
+ self._model_api = api.ModelServiceApi(self._client)
47
+ self._leaderboard_api = api.LeaderboardServiceApi(self._client)
48
+ self._dashboard_api = api.DashboardServiceApi(self._client)
49
+
50
+ @property
51
+ def leaderboards(self) -> List[l10s.Leaderboard]:
52
+ """List of all leaderboards created for this model."""
53
+ result = []
54
+ page_token = ""
55
+ while True:
56
+ res = self._leaderboard_api.leaderboard_service_list_leaderboards(
57
+ filter=f'model="{self.key}"',
58
+ view=models.V1alphaLeaderboardView.LEADERBOARD_VIEW_BASIC_WITH_TABLE,
59
+ page_token=page_token,
60
+ )
61
+ if not res or not res.leaderboards:
62
+ break
63
+
64
+ res_leaderboards = res.leaderboards or []
65
+ lbs = [
66
+ l10s.Leaderboard._from_api_leaderboard(lb, self._client)
67
+ for lb in res_leaderboards
68
+ ]
69
+ result.extend(lbs)
70
+
71
+ page_token = res.next_page_token
72
+ if not page_token:
73
+ break
74
+
75
+ return result
76
+
77
+ def create_leaderboard(
78
+ self,
79
+ name: str,
80
+ evaluator: e8s.Evaluator,
81
+ test_suite: List[tests.Test],
82
+ description: Optional[str] = None,
83
+ base_models: Optional[List[str]] = None,
84
+ use_cache: bool = True,
85
+ ) -> Optional[l10s.Leaderboard]:
86
+ """Runs a new evaluation for the model and creates a new leaderboard.
87
+
88
+ Args:
89
+ evaluator: The evaluator to use for the evaluation.
90
+ test_suite: The list of tests used to evaluate the model.
91
+ base_models (optional): The base LLM models to use for the evaluation.
92
+ use_cache (optional): Whether to use the cached answers if available.
93
+ """
94
+ lb = l10s.Leaderboard(
95
+ key="",
96
+ name=name,
97
+ description=description or "",
98
+ base_models=base_models or [],
99
+ _model_name=self.key,
100
+ _evaluator_name=evaluator.key,
101
+ _test_names=[t.key for t in test_suite],
102
+ _client=self._client,
103
+ )
104
+ if use_cache:
105
+ res = self._leaderboard_api.leaderboard_service_create_leaderboard(
106
+ lb.to_api_proto()
107
+ )
108
+ else:
109
+ res = self._leaderboard_api.leaderboard_service_create_leaderboard_without_cache(
110
+ lb.to_api_proto()
111
+ )
112
+
113
+ if res and res.operation:
114
+ return self._get_leaderboard_from_operation(res.operation)
115
+
116
+ return None
117
+
118
+ def evaluate(
119
+ self,
120
+ name: str,
121
+ evaluators: Union[e8s.Evaluator, List[e8s.Evaluator]],
122
+ test_suites: Union[tests.Test, List[tests.Test]],
123
+ description: Optional[str] = None,
124
+ base_models: Optional[List[str]] = None,
125
+ ) -> Optional[d8s.Dashboard]:
126
+ """Runs a new evaluation for the model and creates a new dashboard.
127
+
128
+ Args:
129
+ evaluators: The evaluator(s) to use for the evaluation.
130
+ test_suites: The test(s) used to evaluate the model.
131
+ description (optional): The description of the dashboard.
132
+ base_models (optional): The base LLM models to use for the evaluation.
133
+ """
134
+ _evaluators = (
135
+ [evaluators] if isinstance(evaluators, e8s.Evaluator) else evaluators
136
+ )
137
+ _test_suites = (
138
+ [test_suites] if isinstance(test_suites, tests.Test) else test_suites
139
+ )
140
+
141
+ create_lb_reqs: List[models.V1alphaCreateLeaderboardRequest] = []
142
+ for evaluator in _evaluators:
143
+ lb = l10s.Leaderboard(
144
+ key="",
145
+ name=f"{name} - {evaluator.name}",
146
+ description=description or "",
147
+ base_models=base_models or [],
148
+ _model_name=self.key,
149
+ _evaluator_name=evaluator.key,
150
+ _test_names=[t.key for t in _test_suites],
151
+ _client=self._client,
152
+ )
153
+ create_lb_req = models.V1alphaCreateLeaderboardRequest(
154
+ leaderboard=lb.to_api_proto()
155
+ )
156
+ create_lb_reqs.append(create_lb_req)
157
+
158
+ res = self._leaderboard_api.leaderboard_service_batch_create_leaderboards(
159
+ models.V1alphaBatchCreateLeaderboardsRequest(
160
+ requests=create_lb_reqs,
161
+ dashboard_display_name=name,
162
+ dashboard_description=description,
163
+ )
164
+ )
165
+
166
+ if res and res.operation:
167
+ return self._get_dashboard_from_operation(res.operation)
168
+
169
+ return None
170
+
171
+ def create_leaderboard_from_testlab(
172
+ self,
173
+ name: str,
174
+ evaluator: e8s.Evaluator,
175
+ test_lab: str,
176
+ description: Optional[str] = None,
177
+ ) -> Optional[l10s.Leaderboard]:
178
+ """Runs an evaluation from pre-built Test Lab, which contains
179
+ tests and pre-computed answers.
180
+
181
+ Args:
182
+ name: The name of the leaderboard.
183
+ evaluator: The evaluator to use for the evaluation.
184
+ test_lab: The test lab in JSON format to use for the evaluation.
185
+ description (optional): The description of the leaderboard.
186
+ """
187
+ req = models.V1alphaImportLeaderboardRequest(
188
+ testLabJson=test_lab,
189
+ evaluator=evaluator.key,
190
+ model=self.key,
191
+ leaderboardDisplayName=name,
192
+ leaderboardDescription=description or "",
193
+ testDisplayName=f"{name}-Test",
194
+ testDescription=description or "",
195
+ )
196
+ res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
197
+ if res and res.operation:
198
+ return self._get_leaderboard_from_operation(res.operation)
199
+
200
+ return None
201
+
202
+ def delete(self):
203
+ """Deletes the model"""
204
+ self._model_api.model_service_delete_model(self.key)
205
+
206
+ def list_base_models(self) -> List[str]:
207
+ """List base LLM models available to use for the evaluation."""
208
+ res = self._model_api.model_service_list_base_models(self.key)
209
+ if res and res.base_models:
210
+ return [str(m) for m in res.base_models]
211
+
212
+ raise RuntimeError("Failed to list base models")
213
+
214
+ def _get_leaderboard_from_operation(
215
+ self, operation: models.V1alphaOperation
216
+ ) -> Optional[l10s.Leaderboard]:
217
+ """Retrieves the leaderboard from the operation, which created it.
218
+
219
+ Args:
220
+ operation: The operation that created the leaderboard.
221
+ """
222
+ if not operation.metadata:
223
+ raise RuntimeError("Not possible to retrieve leaderboard from operation")
224
+
225
+ leadeboard_id = operation.metadata.to_dict().get("leaderboard")
226
+ res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
227
+ if res and res.leaderboard:
228
+ return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
229
+
230
+ return None
231
+
232
+ def _get_dashboard_from_operation(
233
+ self, operation: models.V1alphaOperation
234
+ ) -> Optional[d8s.Dashboard]:
235
+ """Retrieves the dashboard from the operation, which created it.
236
+
237
+ Args:
238
+ operation: The operation that created the dashboard.
239
+ """
240
+ if not self._client:
241
+ raise RuntimeError("Client is not set.")
242
+
243
+ if not operation.metadata:
244
+ raise RuntimeError("Not possible to retrieve dashboard from operation")
245
+
246
+ dashboard_id = operation.metadata.to_dict().get("dashboard")
247
+ res = self._dashboard_api.dashboard_service_get_dashboard(dashboard_id)
248
+ if res and res.dashboard:
249
+ return d8s.Dashboard._from_api_dashboard(res.dashboard, self._client)
250
+
251
+ return None
252
+
253
+ @staticmethod
254
+ def _from_api_model(
255
+ api_model: models.V1alphaModel, client: api.ApiClient
256
+ ) -> "Model":
257
+ """Converts the API model to the client model."""
258
+ return Model(
259
+ key=api_model.name or "",
260
+ name=api_model.display_name or "",
261
+ description=api_model.description or "",
262
+ url=api_model.url or "",
263
+ api_key=api_model.api_key or "",
264
+ is_rag=Model._is_rag_model(api_model),
265
+ create_time=api_model.create_time,
266
+ update_time=api_model.update_time,
267
+ _client=client,
268
+ )
269
+
270
+ @staticmethod
271
+ def _is_rag_model(api_model: models.V1alphaModel) -> bool:
272
+ return api_model.type in [
273
+ models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_RAG,
274
+ models.V1alphaModelType.MODEL_TYPE_OPENAI_RAG,
275
+ ]
276
+
277
+
278
+ class _Models:
279
+ def __init__(self, client: api.ApiClient):
280
+ self._client = client
281
+ self._api = api.ModelServiceApi(client)
282
+
283
+ def get(self, key: str) -> Model:
284
+ """Gets a model with given key from Eval Studio.
285
+
286
+ Args:
287
+ key: The model resource name to retrieve.
288
+ """
289
+ res = self._api.model_service_get_model(key)
290
+ if res and res.model:
291
+ return Model._from_api_model(res.model, self._client)
292
+
293
+ raise KeyError("Model not found.")
294
+
295
+ def create_h2ogpte_model(
296
+ self, name: str, is_rag: bool, description: str, url: str, api_key: str
297
+ ) -> Model:
298
+ """Creates a new H2OGPTe model in Eval Studio.
299
+
300
+ **Note**: You have to choose between RAG or LLM-only mode for this model.
301
+
302
+ Args:
303
+ name: Name of the model.
304
+ is_rag:
305
+ Whether the model is a RAG or LLM-only system, i.e. no context retrieval.
306
+ description: Description of the model.
307
+ url: URL of the model host system.
308
+ api_key: API key for the model host system.
309
+ """
310
+ model_type = (
311
+ models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_RAG
312
+ if is_rag
313
+ else models.V1alphaModelType.MODEL_TYPE_H2_OGPTE_LLM
314
+ )
315
+ req = models.V1alphaModel(
316
+ display_name=name,
317
+ description=description,
318
+ url=url,
319
+ api_key=api_key,
320
+ type=model_type,
321
+ )
322
+ res = self._api.model_service_create_model(req)
323
+ if res and res.model:
324
+ return Model._from_api_model(res.model, self._client)
325
+
326
+ raise RuntimeError("Failed to create H2OGPTe model")
327
+
328
+ def create_h2ogpt_model(
329
+ self, name: str, description: str, url: str, api_key: str
330
+ ) -> Model:
331
+ """Creates a new H2OGPT model in Eval Studio.
332
+
333
+ Args:
334
+ name: Name of the model.
335
+ description: Description of the model.
336
+ url: URL of the model host system.
337
+ api_key: API key for the model host system.
338
+ """
339
+ req = models.V1alphaModel(
340
+ display_name=name,
341
+ description=description,
342
+ url=url,
343
+ api_key=api_key,
344
+ type=models.V1alphaModelType.MODEL_TYPE_H2_OGPT_LLM,
345
+ )
346
+ res = self._api.model_service_create_model(req)
347
+ if res and res.model:
348
+ return Model._from_api_model(res.model, self._client)
349
+
350
+ raise RuntimeError("Failed to create H2OGPT model")
351
+
352
+ def create_h2o_llmops_model(
353
+ self, name: str, description: str, url: str, api_key: str
354
+ ) -> Model:
355
+ """Creates a new H2O LLMOps Model.
356
+
357
+ Args:
358
+ name: Name of the model.
359
+ description: Description of the model.
360
+ url: URL of the model host system.
361
+ api_key: API key for the model host system.
362
+ """
363
+ req = models.V1alphaModel(
364
+ display_name=name,
365
+ description=description,
366
+ url=url,
367
+ api_key=api_key,
368
+ type=models.V1alphaModelType.MODEL_TYPE_H2_OLLMOPS,
369
+ )
370
+ res = self._api.model_service_create_model(req)
371
+ if res and res.model:
372
+ return Model._from_api_model(res.model, self._client)
373
+
374
+ raise RuntimeError("Failed to create H2O LLMOps model")
375
+
376
+ def create_openai_model(
377
+ self,
378
+ name: str,
379
+ description: str,
380
+ api_key: str,
381
+ url: str = "",
382
+ is_rag: bool = True,
383
+ ) -> Model:
384
+ """Creates a new OpenAI model in Eval Studio.
385
+
386
+ Args:
387
+ name: Name of the model.
388
+ description: Description of the model.
389
+ api_key: API key for the model host system.
390
+ url (optional): If not specified, connects to default OpenAI endpoint.
391
+ Otherwise can use custom OpenAI compatible API.
392
+ is_rag (optional): If True, uses the OpenAI Assistants API for RAG.
393
+ If False, uses plain OpenAI Chat.
394
+
395
+ """
396
+ if url and is_rag:
397
+ raise ValueError(
398
+ "OpenAI Assistants are not currently supported on custom OpenAI endpoints."
399
+ )
400
+
401
+ model_type = (
402
+ models.V1alphaModelType.MODEL_TYPE_OPENAI_RAG
403
+ if is_rag
404
+ else models.V1alphaModelType.MODEL_TYPE_OPENAI_CHAT
405
+ )
406
+ req = models.V1alphaModel(
407
+ display_name=name,
408
+ description=description,
409
+ api_key=api_key,
410
+ url=url or None,
411
+ type=model_type,
412
+ )
413
+ res = self._api.model_service_create_model(req)
414
+ if res and res.model:
415
+ return Model._from_api_model(res.model, self._client)
416
+
417
+ raise RuntimeError("Failed to create OpenAI model")
418
+
419
+ def create_azure_openai_model(
420
+ self, name: str, description: str, url: str, api_key: str, environmentID: str
421
+ ) -> Model:
422
+ """Creates a new Azure-hosted OpenAI model in Eval Studio.
423
+
424
+ Args:
425
+ name: Name of the model.
426
+ description: Description of the model.
427
+ url: URL of the model host system.
428
+ api_key: API key for the model host system.
429
+ environmentID: Azure environment ID.
430
+ """
431
+ params = {_AZURE_ENV_ID_PARAM: environmentID}
432
+ req = models.V1alphaModel(
433
+ display_name=name,
434
+ description=description,
435
+ url=url,
436
+ api_key=api_key,
437
+ type=models.V1alphaModelType.MODEL_TYPE_AZURE_OPENAI_CHAT,
438
+ parameters=json.dumps(params),
439
+ )
440
+ res = self._api.model_service_create_model(req)
441
+ if res and res.model:
442
+ return Model._from_api_model(res.model, self._client)
443
+
444
+ raise RuntimeError("Failed to create Azure model")
445
+
446
+ def create_ollama_model(
447
+ self, name: str, description: str, url: str, api_key: str
448
+ ) -> Model:
449
+ """Creates a new OLLAMA model in Eval Studio.
450
+
451
+ Args:
452
+ name: Name of the model.
453
+ description: Description of the model.
454
+ url: URL of the model host system.
455
+ api_key: API key for the model host system.
456
+ """
457
+ req = models.V1alphaModel(
458
+ display_name=name,
459
+ description=description,
460
+ url=url,
461
+ api_key=api_key,
462
+ type=models.V1alphaModelType.MODEL_TYPE_OLLAMA,
463
+ )
464
+ res = self._api.model_service_create_model(req)
465
+ if res and res.model:
466
+ return Model._from_api_model(res.model, self._client)
467
+
468
+ raise RuntimeError("Failed to create OLLAMA model")
469
+
470
+ def create_amazon_bedrock_model(
471
+ self,
472
+ name: str,
473
+ description: str,
474
+ aws_access_key_id: str,
475
+ aws_secret_access_key: str,
476
+ aws_session_token: str,
477
+ aws_region: str,
478
+ ) -> Model:
479
+ """Creates a new Amazon Bedrock model in Eval Studio.
480
+
481
+ Args:
482
+ name: Name of the model.
483
+ description: Description of the model.
484
+ aws_access_key_id: AWS access key ID.
485
+ aws_secret_access_key: AWS secret access key.
486
+ aws_session_token: AWS session token.
487
+ aws_region: AWS region.
488
+ """
489
+ credentials = {
490
+ "aws_access_key_id": aws_access_key_id,
491
+ "aws_secret_access_key": aws_secret_access_key,
492
+ "aws_session_token": aws_session_token,
493
+ }
494
+ req = models.V1alphaModel(
495
+ display_name=name,
496
+ description=description,
497
+ type=models.V1alphaModelType.MODEL_TYPE_AMAZON_BEDROCK,
498
+ api_key=json.dumps(credentials),
499
+ parameters=json.dumps({"region": aws_region}),
500
+ )
501
+ res = self._api.model_service_create_model(req)
502
+ if res and res.model:
503
+ return Model._from_api_model(res.model, self._client)
504
+
505
+ raise RuntimeError("Failed to create Amazon Bedrock model")
506
+
507
+ def delete(self, key: str):
508
+ """Deletes a model with given key from Eval Studio.
509
+
510
+ Args:
511
+ key: The model resource name to delete.
512
+ """
513
+ self._api.model_service_delete_model(key)
514
+
515
+ def list(self) -> List[Model]:
516
+ """Lists all user models in Eval Studio."""
517
+ res = self._api.model_service_list_models()
518
+ if res:
519
+ res_models = res.models or []
520
+ return [Model._from_api_model(m, self._client) for m in res_models]
521
+
522
+ return []
@@ -0,0 +1,101 @@
1
+ import dataclasses
2
+ import enum
3
+ from typing import Any
4
+ from typing import Dict
5
+ from typing import List
6
+ from typing import Optional
7
+ from typing import Union
8
+
9
+ from eval_studio_client import api
10
+ from eval_studio_client.api import models
11
+
12
+
13
+ class PerturbatorIntensity(enum.Enum):
14
+ """Intensity of the perturbator during perturbation."""
15
+
16
+ low = "low"
17
+ medium = "medium"
18
+ high = "high"
19
+
20
+ def to_api_proto(self) -> models.V1alphaPerturbatorIntensity:
21
+ """Converts the client PerturbatorIntensity to an API PerturbatorIntensity."""
22
+ proto_values = {
23
+ PerturbatorIntensity.low: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_LOW,
24
+ PerturbatorIntensity.medium: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_MEDIUM,
25
+ PerturbatorIntensity.high: models.V1alphaPerturbatorIntensity.PERTURBATOR_INTENSITY_HIGH,
26
+ }
27
+
28
+ return proto_values[self]
29
+
30
+
31
+ @dataclasses.dataclass
32
+ class Perturbator:
33
+ """Represents an perturbation method in Eval Studio.
34
+
35
+ Attributes:
36
+ key (str): Generated ID of the perturbator.
37
+ name (str): Display name of the perturbator.
38
+ description (str): Description of the perturbator.
39
+ keywords (List[str]): Keywords associated with the perturbator.
40
+ """
41
+
42
+ _intensity: PerturbatorIntensity
43
+
44
+ key: str
45
+ name: str
46
+ description: str
47
+ keywords: List[str]
48
+
49
+ params: Optional[Dict[str, Any]] = None
50
+
51
+ def __post_init__(self):
52
+ self.intensity = self.intensity or PerturbatorIntensity.medium
53
+
54
+ @staticmethod
55
+ def _from_api_perturbator(
56
+ api_perturbator: models.V1alphaPerturbator,
57
+ ) -> "Perturbator":
58
+ """Converts an API Perturbator to a client Perturbator."""
59
+ return Perturbator(
60
+ key=api_perturbator.name or "",
61
+ name=api_perturbator.display_name or "",
62
+ description=api_perturbator.description or "",
63
+ keywords=api_perturbator.tags or [],
64
+ _intensity=PerturbatorIntensity.medium,
65
+ )
66
+
67
+ @property
68
+ def intensity(self) -> PerturbatorIntensity:
69
+ return self._intensity
70
+
71
+ @intensity.setter
72
+ def intensity(self, value: Union[PerturbatorIntensity, str]):
73
+ if isinstance(value, str):
74
+ value = PerturbatorIntensity(value)
75
+ self._intensity = value
76
+
77
+
78
+ class _Perturbators:
79
+ def __init__(self, client: api.ApiClient):
80
+ self._client = client
81
+ self._api = api.PerturbatorServiceApi(client)
82
+
83
+ def get(self, key: str) -> Perturbator:
84
+ """Retrieves a perturbator by key.
85
+
86
+ Args:
87
+ key (str): ID of the perturbator.
88
+ """
89
+ res = self._api.perturbator_service_get_perturbator(key)
90
+ if res and res.perturbator:
91
+ return Perturbator._from_api_perturbator(res.perturbator)
92
+
93
+ raise KeyError("Perturbator not found")
94
+
95
+ def list(self) -> List[Perturbator]:
96
+ """Lists all available perturbators in Eval Studio."""
97
+ res = self._api.perturbator_service_list_perturbators()
98
+ if res and res.perturbators:
99
+ return [Perturbator._from_api_perturbator(e) for e in res.perturbators]
100
+
101
+ return []
@@ -0,0 +1,50 @@
1
+ import dataclasses
2
+ import enum
3
+ from typing import Dict
4
+ from typing import List
5
+ from typing import Optional
6
+
7
+ from eval_studio_client.api import models
8
+
9
+
10
+ class ProblemSeverity(enum.Enum):
11
+ """Severity of the problem detected during evaluation."""
12
+
13
+ low = "low"
14
+ medium = "medium"
15
+ high = "high"
16
+ unknown = "unknown"
17
+
18
+
19
+ @dataclasses.dataclass
20
+ class Problem:
21
+ """Problems represents an issue detected during evaluation. It's always related
22
+ to the specific evaluation technique that was used and also contains the
23
+ suggested actions, which could mitigated the problem.
24
+ """
25
+
26
+ description: str
27
+ severity: ProblemSeverity
28
+ problem_type: str
29
+ problem_attrs: Dict[str, str]
30
+ recommended_actions: str
31
+ resources: List[str]
32
+ _evaluator_id: Optional[str] = None
33
+
34
+ @staticmethod
35
+ def _from_api_problem(api_problem: models.V1alphaProblemAndAction) -> "Problem":
36
+ """Converts an API Problem to a client Problem."""
37
+ try:
38
+ severity = ProblemSeverity(api_problem.severity)
39
+ except ValueError:
40
+ severity = ProblemSeverity.unknown
41
+
42
+ return Problem(
43
+ description=api_problem.description or "",
44
+ severity=severity,
45
+ problem_type=api_problem.problem_type or "",
46
+ problem_attrs=api_problem.problem_attrs or {},
47
+ recommended_actions=api_problem.actions_description or "",
48
+ resources=api_problem.resources or [],
49
+ _evaluator_id=api_problem.explainer_id,
50
+ )