eval-studio-client 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (470) hide show
  1. eval_studio_client/__about__.py +1 -0
  2. eval_studio_client/__init__.py +4 -0
  3. eval_studio_client/api/__init__.py +180 -0
  4. eval_studio_client/api/api/__init__.py +20 -0
  5. eval_studio_client/api/api/dashboard_service_api.py +2142 -0
  6. eval_studio_client/api/api/document_service_api.py +1868 -0
  7. eval_studio_client/api/api/evaluation_service_api.py +1603 -0
  8. eval_studio_client/api/api/evaluator_service_api.py +1343 -0
  9. eval_studio_client/api/api/info_service_api.py +275 -0
  10. eval_studio_client/api/api/leaderboard_service_api.py +3336 -0
  11. eval_studio_client/api/api/model_service_api.py +2913 -0
  12. eval_studio_client/api/api/operation_progress_service_api.py +292 -0
  13. eval_studio_client/api/api/operation_service_api.py +1359 -0
  14. eval_studio_client/api/api/perturbation_service_api.py +321 -0
  15. eval_studio_client/api/api/perturbator_service_api.py +532 -0
  16. eval_studio_client/api/api/test_case_service_api.py +1913 -0
  17. eval_studio_client/api/api/test_class_service_api.py +532 -0
  18. eval_studio_client/api/api/test_lab_service_api.py +634 -0
  19. eval_studio_client/api/api/test_service_api.py +2712 -0
  20. eval_studio_client/api/api/who_am_i_service_api.py +275 -0
  21. eval_studio_client/api/api_client.py +770 -0
  22. eval_studio_client/api/api_response.py +21 -0
  23. eval_studio_client/api/configuration.py +436 -0
  24. eval_studio_client/api/docs/DashboardServiceApi.md +549 -0
  25. eval_studio_client/api/docs/DocumentServiceApi.md +478 -0
  26. eval_studio_client/api/docs/EvaluationServiceApi.md +332 -0
  27. eval_studio_client/api/docs/EvaluatorServiceApi.md +345 -0
  28. eval_studio_client/api/docs/InfoServiceApi.md +71 -0
  29. eval_studio_client/api/docs/LeaderboardServiceApi.md +835 -0
  30. eval_studio_client/api/docs/ModelServiceApi.md +750 -0
  31. eval_studio_client/api/docs/OperationProgressServiceApi.md +75 -0
  32. eval_studio_client/api/docs/OperationServiceApi.md +345 -0
  33. eval_studio_client/api/docs/PerturbationServiceApi.md +78 -0
  34. eval_studio_client/api/docs/PerturbationServiceCreatePerturbationRequest.md +31 -0
  35. eval_studio_client/api/docs/PerturbatorServiceApi.md +138 -0
  36. eval_studio_client/api/docs/ProtobufAny.md +30 -0
  37. eval_studio_client/api/docs/RequiredTheDashboardToUpdate.md +41 -0
  38. eval_studio_client/api/docs/RequiredTheDocumentToUpdate.md +38 -0
  39. eval_studio_client/api/docs/RequiredTheLeaderboardToUpdate.md +54 -0
  40. eval_studio_client/api/docs/RequiredTheModelToUpdate.md +41 -0
  41. eval_studio_client/api/docs/RequiredTheOperationToFinalize.md +39 -0
  42. eval_studio_client/api/docs/RequiredTheOperationToUpdate.md +39 -0
  43. eval_studio_client/api/docs/RequiredTheTestCaseToUpdate.md +39 -0
  44. eval_studio_client/api/docs/RequiredTheTestToUpdate.md +39 -0
  45. eval_studio_client/api/docs/RpcStatus.md +32 -0
  46. eval_studio_client/api/docs/TestCaseServiceApi.md +486 -0
  47. eval_studio_client/api/docs/TestCaseServiceBatchDeleteTestCasesRequest.md +29 -0
  48. eval_studio_client/api/docs/TestClassServiceApi.md +138 -0
  49. eval_studio_client/api/docs/TestLabServiceApi.md +151 -0
  50. eval_studio_client/api/docs/TestServiceApi.md +689 -0
  51. eval_studio_client/api/docs/TestServicePerturbTestRequest.md +31 -0
  52. eval_studio_client/api/docs/V1alphaBatchCreateLeaderboardsRequest.md +31 -0
  53. eval_studio_client/api/docs/V1alphaBatchCreateLeaderboardsResponse.md +29 -0
  54. eval_studio_client/api/docs/V1alphaBatchDeleteDashboardsRequest.md +29 -0
  55. eval_studio_client/api/docs/V1alphaBatchDeleteDashboardsResponse.md +29 -0
  56. eval_studio_client/api/docs/V1alphaBatchDeleteDocumentsRequest.md +29 -0
  57. eval_studio_client/api/docs/V1alphaBatchDeleteDocumentsResponse.md +29 -0
  58. eval_studio_client/api/docs/V1alphaBatchDeleteEvaluatorsRequest.md +29 -0
  59. eval_studio_client/api/docs/V1alphaBatchDeleteEvaluatorsResponse.md +29 -0
  60. eval_studio_client/api/docs/V1alphaBatchDeleteLeaderboardsRequest.md +30 -0
  61. eval_studio_client/api/docs/V1alphaBatchDeleteLeaderboardsResponse.md +29 -0
  62. eval_studio_client/api/docs/V1alphaBatchDeleteModelsRequest.md +29 -0
  63. eval_studio_client/api/docs/V1alphaBatchDeleteModelsResponse.md +29 -0
  64. eval_studio_client/api/docs/V1alphaBatchDeleteTestCasesResponse.md +29 -0
  65. eval_studio_client/api/docs/V1alphaBatchDeleteTestsRequest.md +30 -0
  66. eval_studio_client/api/docs/V1alphaBatchDeleteTestsResponse.md +29 -0
  67. eval_studio_client/api/docs/V1alphaBatchGetDashboardsResponse.md +29 -0
  68. eval_studio_client/api/docs/V1alphaBatchGetDocumentsResponse.md +29 -0
  69. eval_studio_client/api/docs/V1alphaBatchGetLeaderboardsResponse.md +29 -0
  70. eval_studio_client/api/docs/V1alphaBatchGetModelsResponse.md +29 -0
  71. eval_studio_client/api/docs/V1alphaBatchGetOperationsResponse.md +29 -0
  72. eval_studio_client/api/docs/V1alphaBatchGetTestsResponse.md +29 -0
  73. eval_studio_client/api/docs/V1alphaBatchImportLeaderboardRequest.md +37 -0
  74. eval_studio_client/api/docs/V1alphaBatchImportLeaderboardResponse.md +29 -0
  75. eval_studio_client/api/docs/V1alphaBatchImportTestsRequest.md +32 -0
  76. eval_studio_client/api/docs/V1alphaBatchImportTestsResponse.md +29 -0
  77. eval_studio_client/api/docs/V1alphaCheckBaseModelsResponse.md +30 -0
  78. eval_studio_client/api/docs/V1alphaCollectionInfo.md +33 -0
  79. eval_studio_client/api/docs/V1alphaCreateDashboardResponse.md +29 -0
  80. eval_studio_client/api/docs/V1alphaCreateDocumentResponse.md +29 -0
  81. eval_studio_client/api/docs/V1alphaCreateEvaluationRequest.md +37 -0
  82. eval_studio_client/api/docs/V1alphaCreateEvaluatorResponse.md +29 -0
  83. eval_studio_client/api/docs/V1alphaCreateLeaderboardRequest.md +29 -0
  84. eval_studio_client/api/docs/V1alphaCreateLeaderboardResponse.md +29 -0
  85. eval_studio_client/api/docs/V1alphaCreateLeaderboardWithoutCacheResponse.md +29 -0
  86. eval_studio_client/api/docs/V1alphaCreateModelResponse.md +29 -0
  87. eval_studio_client/api/docs/V1alphaCreatePerturbationResponse.md +29 -0
  88. eval_studio_client/api/docs/V1alphaCreateTestCaseResponse.md +29 -0
  89. eval_studio_client/api/docs/V1alphaCreateTestLabResponse.md +29 -0
  90. eval_studio_client/api/docs/V1alphaCreateTestResponse.md +29 -0
  91. eval_studio_client/api/docs/V1alphaDashboard.md +41 -0
  92. eval_studio_client/api/docs/V1alphaDashboardStatus.md +12 -0
  93. eval_studio_client/api/docs/V1alphaDeleteDashboardResponse.md +29 -0
  94. eval_studio_client/api/docs/V1alphaDeleteDocumentResponse.md +29 -0
  95. eval_studio_client/api/docs/V1alphaDeleteEvaluatorResponse.md +29 -0
  96. eval_studio_client/api/docs/V1alphaDeleteLeaderboardResponse.md +29 -0
  97. eval_studio_client/api/docs/V1alphaDeleteModelResponse.md +29 -0
  98. eval_studio_client/api/docs/V1alphaDeleteTestCaseResponse.md +29 -0
  99. eval_studio_client/api/docs/V1alphaDeleteTestResponse.md +29 -0
  100. eval_studio_client/api/docs/V1alphaDocument.md +38 -0
  101. eval_studio_client/api/docs/V1alphaEvaluationTest.md +32 -0
  102. eval_studio_client/api/docs/V1alphaEvaluator.md +45 -0
  103. eval_studio_client/api/docs/V1alphaEvaluatorParamType.md +12 -0
  104. eval_studio_client/api/docs/V1alphaEvaluatorParameter.md +40 -0
  105. eval_studio_client/api/docs/V1alphaEvaluatorView.md +12 -0
  106. eval_studio_client/api/docs/V1alphaFinalizeOperationResponse.md +29 -0
  107. eval_studio_client/api/docs/V1alphaFindAllTestCasesByIDResponse.md +29 -0
  108. eval_studio_client/api/docs/V1alphaFindTestLabResponse.md +29 -0
  109. eval_studio_client/api/docs/V1alphaGetDashboardResponse.md +29 -0
  110. eval_studio_client/api/docs/V1alphaGetDocumentResponse.md +29 -0
  111. eval_studio_client/api/docs/V1alphaGetEvaluatorResponse.md +29 -0
  112. eval_studio_client/api/docs/V1alphaGetInfoResponse.md +29 -0
  113. eval_studio_client/api/docs/V1alphaGetLeaderboardResponse.md +29 -0
  114. eval_studio_client/api/docs/V1alphaGetModelResponse.md +29 -0
  115. eval_studio_client/api/docs/V1alphaGetOperationProgressByParentResponse.md +29 -0
  116. eval_studio_client/api/docs/V1alphaGetOperationResponse.md +29 -0
  117. eval_studio_client/api/docs/V1alphaGetPerturbatorResponse.md +29 -0
  118. eval_studio_client/api/docs/V1alphaGetTestCaseResponse.md +29 -0
  119. eval_studio_client/api/docs/V1alphaGetTestClassResponse.md +29 -0
  120. eval_studio_client/api/docs/V1alphaGetTestResponse.md +29 -0
  121. eval_studio_client/api/docs/V1alphaImportEvaluationRequest.md +33 -0
  122. eval_studio_client/api/docs/V1alphaImportLeaderboardRequest.md +37 -0
  123. eval_studio_client/api/docs/V1alphaImportLeaderboardResponse.md +29 -0
  124. eval_studio_client/api/docs/V1alphaInfo.md +35 -0
  125. eval_studio_client/api/docs/V1alphaInsight.md +40 -0
  126. eval_studio_client/api/docs/V1alphaLeaderboard.md +54 -0
  127. eval_studio_client/api/docs/V1alphaLeaderboardStatus.md +12 -0
  128. eval_studio_client/api/docs/V1alphaLeaderboardType.md +12 -0
  129. eval_studio_client/api/docs/V1alphaLeaderboardView.md +12 -0
  130. eval_studio_client/api/docs/V1alphaListBaseModelsResponse.md +29 -0
  131. eval_studio_client/api/docs/V1alphaListDashboardsResponse.md +29 -0
  132. eval_studio_client/api/docs/V1alphaListDocumentsResponse.md +29 -0
  133. eval_studio_client/api/docs/V1alphaListEvaluatorsResponse.md +29 -0
  134. eval_studio_client/api/docs/V1alphaListLLMModelsResponse.md +29 -0
  135. eval_studio_client/api/docs/V1alphaListLeaderboardsResponse.md +30 -0
  136. eval_studio_client/api/docs/V1alphaListModelCollectionsResponse.md +29 -0
  137. eval_studio_client/api/docs/V1alphaListModelsResponse.md +29 -0
  138. eval_studio_client/api/docs/V1alphaListMostRecentDashboardsResponse.md +29 -0
  139. eval_studio_client/api/docs/V1alphaListMostRecentLeaderboardsResponse.md +29 -0
  140. eval_studio_client/api/docs/V1alphaListMostRecentModelsResponse.md +29 -0
  141. eval_studio_client/api/docs/V1alphaListMostRecentTestsResponse.md +29 -0
  142. eval_studio_client/api/docs/V1alphaListOperationsResponse.md +29 -0
  143. eval_studio_client/api/docs/V1alphaListPerturbatorsResponse.md +29 -0
  144. eval_studio_client/api/docs/V1alphaListRAGCollectionsResponse.md +29 -0
  145. eval_studio_client/api/docs/V1alphaListTestCasesResponse.md +29 -0
  146. eval_studio_client/api/docs/V1alphaListTestClassesResponse.md +29 -0
  147. eval_studio_client/api/docs/V1alphaListTestsResponse.md +29 -0
  148. eval_studio_client/api/docs/V1alphaModel.md +42 -0
  149. eval_studio_client/api/docs/V1alphaModelType.md +12 -0
  150. eval_studio_client/api/docs/V1alphaOperation.md +40 -0
  151. eval_studio_client/api/docs/V1alphaOperationProgress.md +32 -0
  152. eval_studio_client/api/docs/V1alphaPerturbTestResponse.md +29 -0
  153. eval_studio_client/api/docs/V1alphaPerturbator.md +39 -0
  154. eval_studio_client/api/docs/V1alphaPerturbatorConfiguration.md +32 -0
  155. eval_studio_client/api/docs/V1alphaPerturbatorIntensity.md +11 -0
  156. eval_studio_client/api/docs/V1alphaProblemAndAction.md +39 -0
  157. eval_studio_client/api/docs/V1alphaTest.md +40 -0
  158. eval_studio_client/api/docs/V1alphaTestCase.md +40 -0
  159. eval_studio_client/api/docs/V1alphaTestCaseRelationship.md +31 -0
  160. eval_studio_client/api/docs/V1alphaTestClass.md +41 -0
  161. eval_studio_client/api/docs/V1alphaTestClassType.md +12 -0
  162. eval_studio_client/api/docs/V1alphaTestLab.md +41 -0
  163. eval_studio_client/api/docs/V1alphaUpdateDashboardResponse.md +29 -0
  164. eval_studio_client/api/docs/V1alphaUpdateDocumentResponse.md +29 -0
  165. eval_studio_client/api/docs/V1alphaUpdateLeaderboardResponse.md +29 -0
  166. eval_studio_client/api/docs/V1alphaUpdateModelResponse.md +29 -0
  167. eval_studio_client/api/docs/V1alphaUpdateOperationResponse.md +29 -0
  168. eval_studio_client/api/docs/V1alphaUpdateTestCaseResponse.md +29 -0
  169. eval_studio_client/api/docs/V1alphaUpdateTestResponse.md +29 -0
  170. eval_studio_client/api/docs/V1alphaWhoAmIResponse.md +31 -0
  171. eval_studio_client/api/docs/WhoAmIServiceApi.md +72 -0
  172. eval_studio_client/api/exceptions.py +199 -0
  173. eval_studio_client/api/models/__init__.py +148 -0
  174. eval_studio_client/api/models/perturbation_service_create_perturbation_request.py +115 -0
  175. eval_studio_client/api/models/protobuf_any.py +100 -0
  176. eval_studio_client/api/models/required_the_dashboard_to_update.py +127 -0
  177. eval_studio_client/api/models/required_the_document_to_update.py +116 -0
  178. eval_studio_client/api/models/required_the_leaderboard_to_update.py +178 -0
  179. eval_studio_client/api/models/required_the_model_to_update.py +127 -0
  180. eval_studio_client/api/models/required_the_operation_to_finalize.py +129 -0
  181. eval_studio_client/api/models/required_the_operation_to_update.py +129 -0
  182. eval_studio_client/api/models/required_the_test_case_to_update.py +120 -0
  183. eval_studio_client/api/models/required_the_test_to_update.py +122 -0
  184. eval_studio_client/api/models/rpc_status.py +99 -0
  185. eval_studio_client/api/models/test_case_service_batch_delete_test_cases_request.py +87 -0
  186. eval_studio_client/api/models/test_service_perturb_test_request.py +99 -0
  187. eval_studio_client/api/models/v1alpha_batch_create_leaderboards_request.py +99 -0
  188. eval_studio_client/api/models/v1alpha_batch_create_leaderboards_response.py +91 -0
  189. eval_studio_client/api/models/v1alpha_batch_delete_dashboards_request.py +87 -0
  190. eval_studio_client/api/models/v1alpha_batch_delete_dashboards_response.py +95 -0
  191. eval_studio_client/api/models/v1alpha_batch_delete_documents_request.py +87 -0
  192. eval_studio_client/api/models/v1alpha_batch_delete_documents_response.py +95 -0
  193. eval_studio_client/api/models/v1alpha_batch_delete_evaluators_request.py +87 -0
  194. eval_studio_client/api/models/v1alpha_batch_delete_evaluators_response.py +95 -0
  195. eval_studio_client/api/models/v1alpha_batch_delete_leaderboards_request.py +90 -0
  196. eval_studio_client/api/models/v1alpha_batch_delete_leaderboards_response.py +95 -0
  197. eval_studio_client/api/models/v1alpha_batch_delete_models_request.py +87 -0
  198. eval_studio_client/api/models/v1alpha_batch_delete_models_response.py +95 -0
  199. eval_studio_client/api/models/v1alpha_batch_delete_test_cases_response.py +95 -0
  200. eval_studio_client/api/models/v1alpha_batch_delete_tests_request.py +89 -0
  201. eval_studio_client/api/models/v1alpha_batch_delete_tests_response.py +95 -0
  202. eval_studio_client/api/models/v1alpha_batch_get_dashboards_response.py +95 -0
  203. eval_studio_client/api/models/v1alpha_batch_get_documents_response.py +95 -0
  204. eval_studio_client/api/models/v1alpha_batch_get_leaderboards_response.py +95 -0
  205. eval_studio_client/api/models/v1alpha_batch_get_models_response.py +95 -0
  206. eval_studio_client/api/models/v1alpha_batch_get_operations_response.py +95 -0
  207. eval_studio_client/api/models/v1alpha_batch_get_tests_response.py +95 -0
  208. eval_studio_client/api/models/v1alpha_batch_import_leaderboard_request.py +104 -0
  209. eval_studio_client/api/models/v1alpha_batch_import_leaderboard_response.py +91 -0
  210. eval_studio_client/api/models/v1alpha_batch_import_tests_request.py +93 -0
  211. eval_studio_client/api/models/v1alpha_batch_import_tests_response.py +95 -0
  212. eval_studio_client/api/models/v1alpha_check_base_models_response.py +89 -0
  213. eval_studio_client/api/models/v1alpha_collection_info.py +93 -0
  214. eval_studio_client/api/models/v1alpha_create_dashboard_response.py +91 -0
  215. eval_studio_client/api/models/v1alpha_create_document_response.py +91 -0
  216. eval_studio_client/api/models/v1alpha_create_evaluation_request.py +115 -0
  217. eval_studio_client/api/models/v1alpha_create_evaluator_response.py +91 -0
  218. eval_studio_client/api/models/v1alpha_create_leaderboard_request.py +91 -0
  219. eval_studio_client/api/models/v1alpha_create_leaderboard_response.py +91 -0
  220. eval_studio_client/api/models/v1alpha_create_leaderboard_without_cache_response.py +91 -0
  221. eval_studio_client/api/models/v1alpha_create_model_response.py +91 -0
  222. eval_studio_client/api/models/v1alpha_create_perturbation_response.py +87 -0
  223. eval_studio_client/api/models/v1alpha_create_test_case_response.py +91 -0
  224. eval_studio_client/api/models/v1alpha_create_test_lab_response.py +91 -0
  225. eval_studio_client/api/models/v1alpha_create_test_response.py +91 -0
  226. eval_studio_client/api/models/v1alpha_dashboard.py +131 -0
  227. eval_studio_client/api/models/v1alpha_dashboard_status.py +39 -0
  228. eval_studio_client/api/models/v1alpha_delete_dashboard_response.py +91 -0
  229. eval_studio_client/api/models/v1alpha_delete_document_response.py +91 -0
  230. eval_studio_client/api/models/v1alpha_delete_evaluator_response.py +91 -0
  231. eval_studio_client/api/models/v1alpha_delete_leaderboard_response.py +91 -0
  232. eval_studio_client/api/models/v1alpha_delete_model_response.py +91 -0
  233. eval_studio_client/api/models/v1alpha_delete_test_case_response.py +91 -0
  234. eval_studio_client/api/models/v1alpha_delete_test_response.py +91 -0
  235. eval_studio_client/api/models/v1alpha_document.py +120 -0
  236. eval_studio_client/api/models/v1alpha_evaluation_test.py +107 -0
  237. eval_studio_client/api/models/v1alpha_evaluator.py +155 -0
  238. eval_studio_client/api/models/v1alpha_evaluator_param_type.py +42 -0
  239. eval_studio_client/api/models/v1alpha_evaluator_parameter.py +126 -0
  240. eval_studio_client/api/models/v1alpha_evaluator_view.py +38 -0
  241. eval_studio_client/api/models/v1alpha_finalize_operation_response.py +91 -0
  242. eval_studio_client/api/models/v1alpha_find_all_test_cases_by_id_response.py +95 -0
  243. eval_studio_client/api/models/v1alpha_find_test_lab_response.py +91 -0
  244. eval_studio_client/api/models/v1alpha_get_dashboard_response.py +91 -0
  245. eval_studio_client/api/models/v1alpha_get_document_response.py +91 -0
  246. eval_studio_client/api/models/v1alpha_get_evaluator_response.py +91 -0
  247. eval_studio_client/api/models/v1alpha_get_info_response.py +91 -0
  248. eval_studio_client/api/models/v1alpha_get_leaderboard_response.py +91 -0
  249. eval_studio_client/api/models/v1alpha_get_model_response.py +91 -0
  250. eval_studio_client/api/models/v1alpha_get_operation_progress_by_parent_response.py +91 -0
  251. eval_studio_client/api/models/v1alpha_get_operation_response.py +91 -0
  252. eval_studio_client/api/models/v1alpha_get_perturbator_response.py +91 -0
  253. eval_studio_client/api/models/v1alpha_get_test_case_response.py +91 -0
  254. eval_studio_client/api/models/v1alpha_get_test_class_response.py +91 -0
  255. eval_studio_client/api/models/v1alpha_get_test_response.py +91 -0
  256. eval_studio_client/api/models/v1alpha_import_evaluation_request.py +99 -0
  257. eval_studio_client/api/models/v1alpha_import_leaderboard_request.py +104 -0
  258. eval_studio_client/api/models/v1alpha_import_leaderboard_response.py +91 -0
  259. eval_studio_client/api/models/v1alpha_info.py +99 -0
  260. eval_studio_client/api/models/v1alpha_insight.py +107 -0
  261. eval_studio_client/api/models/v1alpha_leaderboard.py +182 -0
  262. eval_studio_client/api/models/v1alpha_leaderboard_status.py +39 -0
  263. eval_studio_client/api/models/v1alpha_leaderboard_type.py +39 -0
  264. eval_studio_client/api/models/v1alpha_leaderboard_view.py +39 -0
  265. eval_studio_client/api/models/v1alpha_list_base_models_response.py +87 -0
  266. eval_studio_client/api/models/v1alpha_list_dashboards_response.py +95 -0
  267. eval_studio_client/api/models/v1alpha_list_documents_response.py +95 -0
  268. eval_studio_client/api/models/v1alpha_list_evaluators_response.py +95 -0
  269. eval_studio_client/api/models/v1alpha_list_leaderboards_response.py +97 -0
  270. eval_studio_client/api/models/v1alpha_list_llm_models_response.py +87 -0
  271. eval_studio_client/api/models/v1alpha_list_model_collections_response.py +95 -0
  272. eval_studio_client/api/models/v1alpha_list_models_response.py +95 -0
  273. eval_studio_client/api/models/v1alpha_list_most_recent_dashboards_response.py +95 -0
  274. eval_studio_client/api/models/v1alpha_list_most_recent_leaderboards_response.py +95 -0
  275. eval_studio_client/api/models/v1alpha_list_most_recent_models_response.py +95 -0
  276. eval_studio_client/api/models/v1alpha_list_most_recent_tests_response.py +95 -0
  277. eval_studio_client/api/models/v1alpha_list_operations_response.py +95 -0
  278. eval_studio_client/api/models/v1alpha_list_perturbators_response.py +95 -0
  279. eval_studio_client/api/models/v1alpha_list_rag_collections_response.py +95 -0
  280. eval_studio_client/api/models/v1alpha_list_test_cases_response.py +95 -0
  281. eval_studio_client/api/models/v1alpha_list_test_classes_response.py +95 -0
  282. eval_studio_client/api/models/v1alpha_list_tests_response.py +95 -0
  283. eval_studio_client/api/models/v1alpha_model.py +131 -0
  284. eval_studio_client/api/models/v1alpha_model_type.py +46 -0
  285. eval_studio_client/api/models/v1alpha_operation.py +133 -0
  286. eval_studio_client/api/models/v1alpha_operation_progress.py +99 -0
  287. eval_studio_client/api/models/v1alpha_perturb_test_response.py +91 -0
  288. eval_studio_client/api/models/v1alpha_perturbator.py +122 -0
  289. eval_studio_client/api/models/v1alpha_perturbator_configuration.py +92 -0
  290. eval_studio_client/api/models/v1alpha_perturbator_intensity.py +39 -0
  291. eval_studio_client/api/models/v1alpha_problem_and_action.py +129 -0
  292. eval_studio_client/api/models/v1alpha_test.py +126 -0
  293. eval_studio_client/api/models/v1alpha_test_case.py +124 -0
  294. eval_studio_client/api/models/v1alpha_test_case_relationship.py +91 -0
  295. eval_studio_client/api/models/v1alpha_test_class.py +127 -0
  296. eval_studio_client/api/models/v1alpha_test_class_type.py +42 -0
  297. eval_studio_client/api/models/v1alpha_test_lab.py +137 -0
  298. eval_studio_client/api/models/v1alpha_update_dashboard_response.py +91 -0
  299. eval_studio_client/api/models/v1alpha_update_document_response.py +91 -0
  300. eval_studio_client/api/models/v1alpha_update_leaderboard_response.py +91 -0
  301. eval_studio_client/api/models/v1alpha_update_model_response.py +91 -0
  302. eval_studio_client/api/models/v1alpha_update_operation_response.py +91 -0
  303. eval_studio_client/api/models/v1alpha_update_test_case_response.py +91 -0
  304. eval_studio_client/api/models/v1alpha_update_test_response.py +91 -0
  305. eval_studio_client/api/models/v1alpha_who_am_i_response.py +91 -0
  306. eval_studio_client/api/rest.py +257 -0
  307. eval_studio_client/api/test/__init__.py +0 -0
  308. eval_studio_client/api/test/test_dashboard_service_api.py +79 -0
  309. eval_studio_client/api/test/test_document_service_api.py +73 -0
  310. eval_studio_client/api/test/test_evaluation_service_api.py +55 -0
  311. eval_studio_client/api/test/test_evaluator_service_api.py +61 -0
  312. eval_studio_client/api/test/test_info_service_api.py +37 -0
  313. eval_studio_client/api/test/test_leaderboard_service_api.py +103 -0
  314. eval_studio_client/api/test/test_model_service_api.py +97 -0
  315. eval_studio_client/api/test/test_operation_progress_service_api.py +37 -0
  316. eval_studio_client/api/test/test_operation_service_api.py +61 -0
  317. eval_studio_client/api/test/test_perturbation_service_api.py +37 -0
  318. eval_studio_client/api/test/test_perturbation_service_create_perturbation_request.py +79 -0
  319. eval_studio_client/api/test/test_perturbator_service_api.py +43 -0
  320. eval_studio_client/api/test/test_protobuf_any.py +51 -0
  321. eval_studio_client/api/test/test_required_the_dashboard_to_update.py +64 -0
  322. eval_studio_client/api/test/test_required_the_document_to_update.py +59 -0
  323. eval_studio_client/api/test/test_required_the_leaderboard_to_update.py +115 -0
  324. eval_studio_client/api/test/test_required_the_model_to_update.py +63 -0
  325. eval_studio_client/api/test/test_required_the_operation_to_finalize.py +71 -0
  326. eval_studio_client/api/test/test_required_the_operation_to_update.py +71 -0
  327. eval_studio_client/api/test/test_required_the_test_case_to_update.py +63 -0
  328. eval_studio_client/api/test/test_required_the_test_to_update.py +65 -0
  329. eval_studio_client/api/test/test_rpc_status.py +57 -0
  330. eval_studio_client/api/test/test_test_case_service_api.py +73 -0
  331. eval_studio_client/api/test/test_test_case_service_batch_delete_test_cases_request.py +53 -0
  332. eval_studio_client/api/test/test_test_class_service_api.py +43 -0
  333. eval_studio_client/api/test/test_test_lab_service_api.py +43 -0
  334. eval_studio_client/api/test/test_test_service_api.py +91 -0
  335. eval_studio_client/api/test/test_test_service_perturb_test_request.py +58 -0
  336. eval_studio_client/api/test/test_v1alpha_batch_create_leaderboards_request.py +119 -0
  337. eval_studio_client/api/test/test_v1alpha_batch_create_leaderboards_response.py +71 -0
  338. eval_studio_client/api/test/test_v1alpha_batch_delete_dashboards_request.py +53 -0
  339. eval_studio_client/api/test/test_v1alpha_batch_delete_dashboards_response.py +68 -0
  340. eval_studio_client/api/test/test_v1alpha_batch_delete_documents_request.py +53 -0
  341. eval_studio_client/api/test/test_v1alpha_batch_delete_documents_response.py +63 -0
  342. eval_studio_client/api/test/test_v1alpha_batch_delete_evaluators_request.py +53 -0
  343. eval_studio_client/api/test/test_v1alpha_batch_delete_evaluators_response.py +91 -0
  344. eval_studio_client/api/test/test_v1alpha_batch_delete_leaderboards_request.py +54 -0
  345. eval_studio_client/api/test/test_v1alpha_batch_delete_leaderboards_response.py +116 -0
  346. eval_studio_client/api/test/test_v1alpha_batch_delete_models_request.py +53 -0
  347. eval_studio_client/api/test/test_v1alpha_batch_delete_models_response.py +67 -0
  348. eval_studio_client/api/test/test_v1alpha_batch_delete_test_cases_response.py +67 -0
  349. eval_studio_client/api/test/test_v1alpha_batch_delete_tests_request.py +54 -0
  350. eval_studio_client/api/test/test_v1alpha_batch_delete_tests_response.py +69 -0
  351. eval_studio_client/api/test/test_v1alpha_batch_get_dashboards_response.py +68 -0
  352. eval_studio_client/api/test/test_v1alpha_batch_get_documents_response.py +63 -0
  353. eval_studio_client/api/test/test_v1alpha_batch_get_leaderboards_response.py +116 -0
  354. eval_studio_client/api/test/test_v1alpha_batch_get_models_response.py +67 -0
  355. eval_studio_client/api/test/test_v1alpha_batch_get_operations_response.py +73 -0
  356. eval_studio_client/api/test/test_v1alpha_batch_get_tests_response.py +69 -0
  357. eval_studio_client/api/test/test_v1alpha_batch_import_leaderboard_request.py +61 -0
  358. eval_studio_client/api/test/test_v1alpha_batch_import_leaderboard_response.py +71 -0
  359. eval_studio_client/api/test/test_v1alpha_batch_import_tests_request.py +54 -0
  360. eval_studio_client/api/test/test_v1alpha_batch_import_tests_response.py +69 -0
  361. eval_studio_client/api/test/test_v1alpha_check_base_models_response.py +52 -0
  362. eval_studio_client/api/test/test_v1alpha_collection_info.py +54 -0
  363. eval_studio_client/api/test/test_v1alpha_create_dashboard_response.py +66 -0
  364. eval_studio_client/api/test/test_v1alpha_create_document_response.py +61 -0
  365. eval_studio_client/api/test/test_v1alpha_create_evaluation_request.py +107 -0
  366. eval_studio_client/api/test/test_v1alpha_create_evaluator_response.py +89 -0
  367. eval_studio_client/api/test/test_v1alpha_create_leaderboard_request.py +114 -0
  368. eval_studio_client/api/test/test_v1alpha_create_leaderboard_response.py +71 -0
  369. eval_studio_client/api/test/test_v1alpha_create_leaderboard_without_cache_response.py +71 -0
  370. eval_studio_client/api/test/test_v1alpha_create_model_response.py +65 -0
  371. eval_studio_client/api/test/test_v1alpha_create_perturbation_response.py +51 -0
  372. eval_studio_client/api/test/test_v1alpha_create_test_case_response.py +65 -0
  373. eval_studio_client/api/test/test_v1alpha_create_test_lab_response.py +68 -0
  374. eval_studio_client/api/test/test_v1alpha_create_test_response.py +67 -0
  375. eval_studio_client/api/test/test_v1alpha_dashboard.py +65 -0
  376. eval_studio_client/api/test/test_v1alpha_dashboard_status.py +33 -0
  377. eval_studio_client/api/test/test_v1alpha_delete_dashboard_response.py +66 -0
  378. eval_studio_client/api/test/test_v1alpha_delete_document_response.py +61 -0
  379. eval_studio_client/api/test/test_v1alpha_delete_evaluator_response.py +89 -0
  380. eval_studio_client/api/test/test_v1alpha_delete_leaderboard_response.py +114 -0
  381. eval_studio_client/api/test/test_v1alpha_delete_model_response.py +65 -0
  382. eval_studio_client/api/test/test_v1alpha_delete_test_case_response.py +65 -0
  383. eval_studio_client/api/test/test_v1alpha_delete_test_response.py +67 -0
  384. eval_studio_client/api/test/test_v1alpha_document.py +60 -0
  385. eval_studio_client/api/test/test_v1alpha_evaluation_test.py +76 -0
  386. eval_studio_client/api/test/test_v1alpha_evaluator.py +91 -0
  387. eval_studio_client/api/test/test_v1alpha_evaluator_param_type.py +33 -0
  388. eval_studio_client/api/test/test_v1alpha_evaluator_parameter.py +68 -0
  389. eval_studio_client/api/test/test_v1alpha_evaluator_view.py +33 -0
  390. eval_studio_client/api/test/test_v1alpha_finalize_operation_response.py +71 -0
  391. eval_studio_client/api/test/test_v1alpha_find_all_test_cases_by_id_response.py +67 -0
  392. eval_studio_client/api/test/test_v1alpha_find_test_lab_response.py +68 -0
  393. eval_studio_client/api/test/test_v1alpha_get_dashboard_response.py +66 -0
  394. eval_studio_client/api/test/test_v1alpha_get_document_response.py +61 -0
  395. eval_studio_client/api/test/test_v1alpha_get_evaluator_response.py +89 -0
  396. eval_studio_client/api/test/test_v1alpha_get_info_response.py +60 -0
  397. eval_studio_client/api/test/test_v1alpha_get_leaderboard_response.py +114 -0
  398. eval_studio_client/api/test/test_v1alpha_get_model_response.py +65 -0
  399. eval_studio_client/api/test/test_v1alpha_get_operation_progress_by_parent_response.py +55 -0
  400. eval_studio_client/api/test/test_v1alpha_get_operation_response.py +71 -0
  401. eval_studio_client/api/test/test_v1alpha_get_perturbator_response.py +64 -0
  402. eval_studio_client/api/test/test_v1alpha_get_test_case_response.py +65 -0
  403. eval_studio_client/api/test/test_v1alpha_get_test_class_response.py +70 -0
  404. eval_studio_client/api/test/test_v1alpha_get_test_response.py +67 -0
  405. eval_studio_client/api/test/test_v1alpha_import_evaluation_request.py +73 -0
  406. eval_studio_client/api/test/test_v1alpha_import_leaderboard_request.py +59 -0
  407. eval_studio_client/api/test/test_v1alpha_import_leaderboard_response.py +71 -0
  408. eval_studio_client/api/test/test_v1alpha_info.py +59 -0
  409. eval_studio_client/api/test/test_v1alpha_insight.py +67 -0
  410. eval_studio_client/api/test/test_v1alpha_leaderboard.py +116 -0
  411. eval_studio_client/api/test/test_v1alpha_leaderboard_status.py +33 -0
  412. eval_studio_client/api/test/test_v1alpha_leaderboard_type.py +33 -0
  413. eval_studio_client/api/test/test_v1alpha_leaderboard_view.py +33 -0
  414. eval_studio_client/api/test/test_v1alpha_list_base_models_response.py +53 -0
  415. eval_studio_client/api/test/test_v1alpha_list_dashboards_response.py +68 -0
  416. eval_studio_client/api/test/test_v1alpha_list_documents_response.py +63 -0
  417. eval_studio_client/api/test/test_v1alpha_list_evaluators_response.py +91 -0
  418. eval_studio_client/api/test/test_v1alpha_list_leaderboards_response.py +117 -0
  419. eval_studio_client/api/test/test_v1alpha_list_llm_models_response.py +53 -0
  420. eval_studio_client/api/test/test_v1alpha_list_model_collections_response.py +57 -0
  421. eval_studio_client/api/test/test_v1alpha_list_models_response.py +67 -0
  422. eval_studio_client/api/test/test_v1alpha_list_most_recent_dashboards_response.py +68 -0
  423. eval_studio_client/api/test/test_v1alpha_list_most_recent_leaderboards_response.py +116 -0
  424. eval_studio_client/api/test/test_v1alpha_list_most_recent_models_response.py +67 -0
  425. eval_studio_client/api/test/test_v1alpha_list_most_recent_tests_response.py +69 -0
  426. eval_studio_client/api/test/test_v1alpha_list_operations_response.py +73 -0
  427. eval_studio_client/api/test/test_v1alpha_list_perturbators_response.py +66 -0
  428. eval_studio_client/api/test/test_v1alpha_list_rag_collections_response.py +57 -0
  429. eval_studio_client/api/test/test_v1alpha_list_test_cases_response.py +67 -0
  430. eval_studio_client/api/test/test_v1alpha_list_test_classes_response.py +72 -0
  431. eval_studio_client/api/test/test_v1alpha_list_tests_response.py +69 -0
  432. eval_studio_client/api/test/test_v1alpha_model.py +64 -0
  433. eval_studio_client/api/test/test_v1alpha_model_type.py +33 -0
  434. eval_studio_client/api/test/test_v1alpha_operation.py +72 -0
  435. eval_studio_client/api/test/test_v1alpha_operation_progress.py +54 -0
  436. eval_studio_client/api/test/test_v1alpha_perturb_test_response.py +67 -0
  437. eval_studio_client/api/test/test_v1alpha_perturbator.py +63 -0
  438. eval_studio_client/api/test/test_v1alpha_perturbator_configuration.py +53 -0
  439. eval_studio_client/api/test/test_v1alpha_perturbator_intensity.py +33 -0
  440. eval_studio_client/api/test/test_v1alpha_problem_and_action.py +65 -0
  441. eval_studio_client/api/test/test_v1alpha_test.py +66 -0
  442. eval_studio_client/api/test/test_v1alpha_test_case.py +64 -0
  443. eval_studio_client/api/test/test_v1alpha_test_case_relationship.py +53 -0
  444. eval_studio_client/api/test/test_v1alpha_test_class.py +69 -0
  445. eval_studio_client/api/test/test_v1alpha_test_class_type.py +33 -0
  446. eval_studio_client/api/test/test_v1alpha_test_lab.py +67 -0
  447. eval_studio_client/api/test/test_v1alpha_update_dashboard_response.py +66 -0
  448. eval_studio_client/api/test/test_v1alpha_update_document_response.py +61 -0
  449. eval_studio_client/api/test/test_v1alpha_update_leaderboard_response.py +114 -0
  450. eval_studio_client/api/test/test_v1alpha_update_model_response.py +65 -0
  451. eval_studio_client/api/test/test_v1alpha_update_operation_response.py +71 -0
  452. eval_studio_client/api/test/test_v1alpha_update_test_case_response.py +65 -0
  453. eval_studio_client/api/test/test_v1alpha_update_test_response.py +67 -0
  454. eval_studio_client/api/test/test_v1alpha_who_am_i_response.py +53 -0
  455. eval_studio_client/api/test/test_who_am_i_service_api.py +38 -0
  456. eval_studio_client/client.py +98 -0
  457. eval_studio_client/dashboards.py +187 -0
  458. eval_studio_client/documents.py +95 -0
  459. eval_studio_client/evaluators.py +65 -0
  460. eval_studio_client/gen/openapiv2/eval_studio.swagger.json +6043 -0
  461. eval_studio_client/insights.py +35 -0
  462. eval_studio_client/leaderboards.py +207 -0
  463. eval_studio_client/models.py +522 -0
  464. eval_studio_client/perturbators.py +101 -0
  465. eval_studio_client/problems.py +50 -0
  466. eval_studio_client/test_labs.py +319 -0
  467. eval_studio_client/tests.py +369 -0
  468. eval_studio_client-0.7.0.dist-info/METADATA +18 -0
  469. eval_studio_client-0.7.0.dist-info/RECORD +470 -0
  470. eval_studio_client-0.7.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,319 @@
1
+ import dataclasses
2
+ import enum
3
+ import json
4
+ from typing import List
5
+ from typing import Optional
6
+ from typing import Union
7
+ import uuid
8
+
9
+ from eval_studio_client import api
10
+ from eval_studio_client import evaluators
11
+ from eval_studio_client import leaderboards as l10s
12
+ from eval_studio_client.api import models as apiModels
13
+
14
+
15
+ class ModelType(enum.Enum):
16
+ h2ogpte = "h2ogpte" # h2oGPTe RAG
17
+ h2ogpte_llm = "h2ogpte_llm" # h2oGPTe-hosted LLM
18
+ h2ogpt = "h2ogpt" # h2oGPT-hosted LLM
19
+ h2ollmops = "h2ollmops" # H2O LLMOps-hosted LLM
20
+ openai_rag = "openai_rag" # OpenAI RAG
21
+ openai_llm = "openai_llm" # OpenAI-hosted LLM
22
+ azure_openai_llm = "azure_openai_llm" # MS Azure hosted OpenAI LLM
23
+ amazon_bedrock = "amazon_bedrock" # Amazon Bedrock
24
+
25
+
26
+ @dataclasses.dataclass
27
+ class TestLab:
28
+ """Represents an Eval Studio Test Lab, which can directly be evaluated,
29
+ without a need to contact LLM/RAG system. This object contains all the information,
30
+ needed for the evaluation, such as prompt, actual answer and retrieved contexts,
31
+ for all of the models.
32
+
33
+ Attributes:
34
+ name (str): The name of the test lab.
35
+ description (str): The description of the test lab.
36
+ dataset: The dataset consists of test cases, which are used for the evaluation.
37
+ models: The models contain definitions of connections to different models
38
+ or RAG systems
39
+ """
40
+
41
+ name: str
42
+ description: str = ""
43
+ _models: List["TestLabModel"] = dataclasses.field(default_factory=list)
44
+ _client: Optional[api.ApiClient] = None
45
+
46
+ __test__ = False
47
+
48
+ def __post_init__(self):
49
+ if self._client:
50
+ self._leaderboard_api = api.LeaderboardServiceApi(self._client)
51
+
52
+ @property
53
+ def models(self) -> List["TestLabModel"]:
54
+ return self._models
55
+
56
+ def add_model(
57
+ self,
58
+ name: str,
59
+ model_type: ModelType,
60
+ llm_model_name: str,
61
+ collection_id: str = "",
62
+ collection_name: str = "",
63
+ documents: Optional[List[str]] = None,
64
+ ) -> "TestLabModel":
65
+ """Registers a new model to the Test Lab.
66
+
67
+ Args:
68
+ name (str): Human readable name of the model.
69
+ model_type (ModelType): The type of the model. One of `ModelType` values.
70
+ llm_model_name (str): Identification of the LLM models used,
71
+ e.g. "h2oai/h2ogpt-4096-llama2-13b-chat"
72
+ collection_id (str, optional): ID of the existing collection in the RAG
73
+ system, which produced the answers.
74
+ collection_name (str, optional): Name of the existing collection in the RAG
75
+ system, which produced the answers.
76
+ documents (Optional[List[str]], optional): List of document URLs used
77
+ in the RAG evaluation. These can later be reused.
78
+
79
+ Returns:
80
+ TestLabModel: New instance of TestLabModel.
81
+ """
82
+ key = str(uuid.uuid4())
83
+ _m = TestLabModel(
84
+ name=name,
85
+ key=key,
86
+ model_type=model_type.value,
87
+ llm_model_name=llm_model_name,
88
+ collection_id=collection_id,
89
+ collection_name=collection_name,
90
+ documents=documents or [],
91
+ )
92
+ self._models.append(_m)
93
+ return _m
94
+
95
+ def evaluate(self, evaluator: evaluators.Evaluator) -> Optional[l10s.Leaderboard]:
96
+ """Runs an evaluation for the test lab.
97
+
98
+ Args:
99
+ evaluator: The evaluator to use for the evaluation.
100
+ """
101
+ req = apiModels.V1alphaImportLeaderboardRequest(
102
+ testLabJson=self.json(),
103
+ evaluator=evaluator.key,
104
+ model=None,
105
+ leaderboardDisplayName=self.name,
106
+ leaderboardDescription=self.description or "",
107
+ testDisplayName=f"{self.name}-Test",
108
+ testDescription=self.description or "",
109
+ )
110
+ res = self._leaderboard_api.leaderboard_service_import_leaderboard(req)
111
+ if res and res.operation:
112
+ return self._get_leaderboard_from_operation(res.operation)
113
+
114
+ return None
115
+
116
+ def json(self) -> str:
117
+ raw_inputs = []
118
+ dataset = []
119
+ for m in self.models:
120
+ raw_inputs.extend(m.raw_inputs)
121
+ dataset.extend(m.dataset)
122
+
123
+ lab = {
124
+ "name": self.name,
125
+ "description": self.description,
126
+ "raw_dataset": {"inputs": raw_inputs},
127
+ "dataset": {"inputs": dataset},
128
+ "models": [m.to_dict() for m in self.models],
129
+ "llm_model_names": self._llm_model_names(),
130
+ }
131
+
132
+ return json.dumps(lab, indent=4, sort_keys=True)
133
+
134
+ def _get_leaderboard_from_operation(
135
+ self, operation: apiModels.V1alphaOperation
136
+ ) -> Optional[l10s.Leaderboard]:
137
+ """Retrieves the leaderboard from the operation, which created it.
138
+
139
+ Args:
140
+ operation: The operation that created the leaderboard.
141
+ """
142
+ if not operation.metadata:
143
+ raise RuntimeError("Not possible to retrieve leaderboard from operation")
144
+
145
+ leadeboard_id = operation.metadata.to_dict().get("leaderboard")
146
+ res = self._leaderboard_api.leaderboard_service_get_leaderboard(leadeboard_id)
147
+ if res and res.leaderboard:
148
+ return l10s.Leaderboard._from_api_leaderboard(res.leaderboard, self._client)
149
+
150
+ return None
151
+
152
+ def _llm_model_names(self) -> List[str]:
153
+ return [m.llm_model_name for m in self.models]
154
+
155
+
156
+ @dataclasses.dataclass
157
+ class TestLabModel:
158
+ """Represents a model, which is used in the testing. This object contains
159
+ the model key, the model name and the model type.
160
+ """
161
+
162
+ # Human readable name of the model
163
+ name: str
164
+ # The unique identification of the model to link with inputs
165
+ key: str
166
+ model_type: str
167
+ llm_model_name: str
168
+ collection_id: str = ""
169
+ collection_name: str = ""
170
+ documents: List[str] = dataclasses.field(default_factory=list)
171
+ connection: str = ""
172
+ _inputs: List["_TestLabInput"] = dataclasses.field(default_factory=list)
173
+
174
+ __test__ = False
175
+
176
+ def __post_init__(self):
177
+ self.validate_model_type()
178
+
179
+ @property
180
+ def raw_inputs(self) -> List[dict]:
181
+ return [i.to_raw_input_dict() for i in self._inputs]
182
+
183
+ @property
184
+ def dataset(self) -> List[dict]:
185
+ return [i.to_dataset_dict() for i in self._inputs]
186
+
187
+ def add_input(
188
+ self,
189
+ prompt: str,
190
+ actual_output: str,
191
+ corpus: Optional[List[str]] = None,
192
+ context: Optional[List[str]] = None,
193
+ categories: Union[str, List[str]] = "",
194
+ expected_output: str = "",
195
+ output_constraints: Optional[List[str]] = None,
196
+ actual_duration: float = 0.0,
197
+ cost: float = 0.0,
198
+ output_condition: str = "",
199
+ ) -> "_TestLabInput":
200
+ """Add an evaluation input, which contains all the info relevant for the
201
+ evaluation, to avoid calling the RAG/LLM itself.
202
+
203
+ Args:
204
+ prompt (str): Prompt or input to the RAG/LLM.
205
+ actual_output (str): Actual output from the RAG/LLM.
206
+ corpus (Optional[List[str]], optional): List of document URLs used in the RAG.
207
+ context (Optional[List[str]], optional): List of retrieved contexts.
208
+ categories (Union[str, List[str]]): List of categories/tags for the input.
209
+ expected_output (str): Expected output from the RAG/LLM.
210
+ output_constraints (List[str]): List of constraints for the output,
211
+ such as expected tokens in the answer.
212
+ actual_duration (float, optional): Duration of the inference of the answer.
213
+ cost (float, optional): Cost estimate of the inference.
214
+ output_condition (str, optional): Output condition is a logical expression
215
+ used to set the expectation on the output. The expression is in
216
+ Google's filtering language format defined in
217
+ https://google.aip.dev/160#logical-operators .
218
+
219
+ Returns:
220
+ TestLabInput instance.
221
+ """
222
+ i = _TestLabInput(
223
+ prompt=prompt,
224
+ corpus=corpus,
225
+ context=context,
226
+ categories=categories,
227
+ expected_output=expected_output,
228
+ output_constraints=output_constraints,
229
+ output_condition=output_condition,
230
+ actual_output=actual_output,
231
+ actual_duration=actual_duration,
232
+ cost=cost,
233
+ model_key=self.key,
234
+ )
235
+ self._inputs.append(i)
236
+ return i
237
+
238
+ def to_dict(self) -> dict:
239
+ return {
240
+ "name": self.name,
241
+ "key": self.key,
242
+ "model_type": self.model_type,
243
+ "collection_id": self.collection_id,
244
+ "collection_name": self.collection_name,
245
+ "llm_model_name": self.llm_model_name,
246
+ "documents": self.documents or [],
247
+ "connection": self.connection,
248
+ }
249
+
250
+ def validate_model_type(self):
251
+ valid_values = [e.value for e in set(ModelType)]
252
+ if self.model_type not in valid_values:
253
+ raise ValueError(
254
+ f"Invalid model type: {self.model_type}. Valid values: {valid_values}"
255
+ )
256
+
257
+
258
+ @dataclasses.dataclass
259
+ class _TestLabInput:
260
+ """Represents a single input for the testing, which is basically a `TestCase`,
261
+ with more information.
262
+ """
263
+
264
+ # The input prompt
265
+ prompt: str
266
+ corpus: Optional[List[str]] = None
267
+ context: Optional[List[str]] = None
268
+ categories: Union[str, List[str]] = ""
269
+ expected_output: str = ""
270
+ output_constraints: Optional[List[str]] = None
271
+ output_condition: str = ""
272
+ actual_output: str = ""
273
+ actual_duration: float = 0.0
274
+ cost: float = 0.0
275
+ model_key: str = ""
276
+
277
+ def to_raw_input_dict(self) -> dict:
278
+ return {
279
+ "input": self.prompt,
280
+ "corpus": self.corpus or [],
281
+ "context": [],
282
+ "categories": self.categories,
283
+ "expected_output": self.expected_output,
284
+ "output_constraints": self.output_constraints or [],
285
+ "output_condition": self.output_condition or "",
286
+ "actual_output": "",
287
+ "actual_duration": 0.0,
288
+ "cost": 0.0,
289
+ "model_key": self.model_key,
290
+ }
291
+
292
+ def to_dataset_dict(self) -> dict:
293
+ return {
294
+ "input": self.prompt,
295
+ "corpus": self.corpus or [],
296
+ "context": self.context or [],
297
+ "categories": self.categories,
298
+ "expected_output": self.expected_output,
299
+ "output_constraints": self.output_constraints or [],
300
+ "output_condition": self.output_condition or "",
301
+ "actual_output": self.actual_output,
302
+ "actual_duration": self.actual_duration,
303
+ "cost": self.cost,
304
+ "model_key": self.model_key,
305
+ }
306
+
307
+
308
+ class _TestLabs:
309
+ def __init__(self, client: api.ApiClient):
310
+ self._client = client
311
+
312
+ def create(self, name: str, description: str = "") -> TestLab:
313
+ """Create a new Test Lab instance
314
+
315
+ Args:
316
+ name: Name of the test lab
317
+ description: Description of the test lab
318
+ """
319
+ return TestLab(name, description, _client=self._client)
@@ -0,0 +1,369 @@
1
+ import dataclasses
2
+ import datetime
3
+ import json
4
+ from typing import List
5
+ from typing import Optional
6
+ from typing import Union
7
+
8
+ from eval_studio_client import api
9
+ from eval_studio_client import documents as d7s
10
+ from eval_studio_client import perturbators as p10s
11
+ from eval_studio_client.api import models
12
+
13
+
14
+ @dataclasses.dataclass
15
+ class TestCase:
16
+ """Represents a single test case, which contains tested prompt, expected answer
17
+ and set of constraints.
18
+
19
+ Attributes:
20
+ key (str): Generated ID of the test case.
21
+ prompt (str): Prompt of the test case.
22
+ answer (str): Expected answer of the test case.
23
+ constraints (List[str]): String tokens expected in the actual answer.
24
+ Note: all of the constraints in the list are concatenated using AND
25
+ operator, which means actual answer need to contain all of the tokens.
26
+ create_time (datetime): Timestamp of the test case creation.
27
+ update_time (datetime): Timestamp of the last test case update.
28
+ """
29
+
30
+ key: str
31
+ prompt: str
32
+ answer: str
33
+ constraints: List[str]
34
+ condition: str
35
+ create_time: Optional[datetime.datetime] = None
36
+ update_time: Optional[datetime.datetime] = None
37
+
38
+ def to_api_proto(self) -> models.V1alphaTestCase:
39
+ """Converts the client TestCase to an API TestCase."""
40
+ return models.V1alphaTestCase(
41
+ prompt=self.prompt,
42
+ answer=self.answer,
43
+ constraints=self.constraints,
44
+ condition=self.condition,
45
+ )
46
+
47
+ @staticmethod
48
+ def _from_api_test_case(api_test_case: models.V1alphaTestCase) -> "TestCase":
49
+ return TestCase(
50
+ key=api_test_case.name or "",
51
+ prompt=api_test_case.prompt or "",
52
+ answer=api_test_case.answer or "",
53
+ constraints=api_test_case.constraints or [],
54
+ condition=api_test_case.condition or "",
55
+ )
56
+
57
+
58
+ @dataclasses.dataclass
59
+ class Test:
60
+ """Represents a test, which contains a set of test cases and optionally
61
+ also documents for evaluating RAG systems.
62
+
63
+ Attributes:
64
+ key (str): Generated ID of the test.
65
+ name (str): Name of the test.
66
+ description (str): Description of the test.
67
+ create_time (datetime): Timestamp of the test creation.
68
+ update_time (datetime): Timestamp of the last test update.
69
+ """
70
+
71
+ key: str
72
+ name: str
73
+ description: str
74
+ _document_names: List[str]
75
+ create_time: Optional[datetime.datetime] = None
76
+ update_time: Optional[datetime.datetime] = None
77
+ _client: Optional[api.ApiClient] = None
78
+
79
+ def __post_init__(self):
80
+ if self._client:
81
+ self._test_api = api.TestServiceApi(self._client)
82
+ self._test_case_api = api.TestCaseServiceApi(self._client)
83
+ self._document_api = api.DocumentServiceApi(self._client)
84
+
85
+ @property
86
+ def test_cases(self) -> List[TestCase]:
87
+ """Retrieves all test cases in the test."""
88
+ res = self._test_case_api.test_case_service_list_test_cases(self.key)
89
+ if res and res.test_cases:
90
+ return [TestCase._from_api_test_case(tc) for tc in res.test_cases]
91
+
92
+ return []
93
+
94
+ @property
95
+ def documents(self) -> List[d7s.Document]:
96
+ """Retrieves all documents attached to the test."""
97
+ if not self._document_names:
98
+ return []
99
+
100
+ res = self._document_api.document_service_batch_get_documents(
101
+ self._document_names
102
+ )
103
+ if res and res.documents:
104
+ return [
105
+ d7s.Document._from_api_document(d, self._client) for d in res.documents
106
+ ]
107
+
108
+ return []
109
+
110
+ def perturb(
111
+ self,
112
+ new_test_name: str,
113
+ perturbators: Union[p10s.Perturbator, str, List[Union[p10s.Perturbator, str]]],
114
+ new_test_description: str = "",
115
+ ) -> "Test":
116
+ """Creates new Test by perturbing this test using the given Perturbators.
117
+
118
+ Args:
119
+ new_test_name (str): Name of the newly created test.
120
+ perturbators (Perturbator, List[Perturbator], str or List[str]): List of Perturbators or
121
+ their keys used to perturbate this Test.
122
+ new_test_description (str): Optional description of the newly created test.
123
+ """
124
+
125
+ if self._client is None:
126
+ raise RuntimeError("Client is not set.")
127
+
128
+ if not new_test_name:
129
+ raise ValueError("New test name must be provided.")
130
+
131
+ if not perturbators:
132
+ raise ValueError("Perturbators must be provided.")
133
+
134
+ if isinstance(perturbators, (p10s.Perturbator, str)):
135
+ perturbators_to_run = [perturbators]
136
+ else:
137
+ perturbators_to_run = perturbators
138
+
139
+ configs = [_PerturbatorConfiguration(p) for p in perturbators_to_run]
140
+
141
+ req = models.TestServicePerturbTestRequest(
142
+ perturbatorConfigurations=[c.to_api_proto() for c in configs],
143
+ newTestDisplayName=new_test_name,
144
+ newTestDescription=new_test_description,
145
+ )
146
+ resp = self._test_api.test_service_perturb_test(self.key, req)
147
+ return Test._from_api_test(resp.test, self._client)
148
+
149
+ def delete(self, force=False):
150
+ """Deletes the test.
151
+
152
+ Args:
153
+ force (bool): If True, test cases will be deleted as well.
154
+ """
155
+ self._test_api.test_service_delete_test(self.key, force=force)
156
+
157
+ def create_test_case(
158
+ self,
159
+ prompt: str,
160
+ answer: str,
161
+ constraints: Optional[List[str]] = None,
162
+ condition: str = "",
163
+ ) -> Optional[TestCase]:
164
+ """Creates a new test case in the test.
165
+
166
+ Args:
167
+ prompt (str): Prompt of the test case.
168
+ answer (str): Expected answer of the test case.
169
+ constraints (List[str]): String tokens expected in the actual answer.
170
+ Note: all of the constraints in the list are concatenated using AND
171
+ operator, which means actual answer need to contain all of the tokens.
172
+ condition (str): Test case output condition, in a form logical expression.
173
+ The format of the string is defined by the Google's filtering language.
174
+ (ref. https://google.aip.dev/160#logical-operators)
175
+ """
176
+ case = TestCase(
177
+ key="",
178
+ prompt=prompt,
179
+ answer=answer,
180
+ constraints=constraints or [],
181
+ condition=condition,
182
+ )
183
+ res = self._test_case_api.test_case_service_create_test_case(
184
+ parent=self.key, test_case=case.to_api_proto()
185
+ )
186
+ if res and res.test_case:
187
+ return TestCase._from_api_test_case(res.test_case)
188
+
189
+ return None
190
+
191
+ def remove_test_case(self, test_case_key: str):
192
+ """Removes a test case from the test.
193
+
194
+ Args:
195
+ test_case_key (str): Resource name of the test case to be removed.
196
+ """
197
+ self._test_case_api.test_case_service_delete_test_case(test_case_key)
198
+
199
+ def create_document(
200
+ self, name: str, url: str, description: Optional[str] = None
201
+ ) -> Optional[d7s.Document]:
202
+ """Creates a new document and attaches it to the test.
203
+
204
+ Args:
205
+ name (str): Name of the document.
206
+ url (str): URL of the document.
207
+ description (str): Description of the document.
208
+ """
209
+ doc = d7s.Document("", name, description or "", url)
210
+ res = self._document_api.document_service_create_document(doc.to_api_proto())
211
+ if res and res.document:
212
+ doc = d7s.Document._from_api_document(res.document, self._client)
213
+
214
+ try:
215
+ self.link_document(doc)
216
+ except ValueError as err:
217
+ raise RuntimeError("Failed to create the document.") from err
218
+ except Exception as err:
219
+ doc.delete()
220
+ raise RuntimeError("Failed to link the document to the test.") from err
221
+
222
+ return doc
223
+
224
+ def link_document(self, document: d7s.Document):
225
+ """Attaches an existing document to the test.
226
+
227
+ Args:
228
+ document (Document): Document to be attached to the test.
229
+ """
230
+ if not document.key:
231
+ raise ValueError("Document must have a resource name.")
232
+
233
+ self._document_names.append(document.key)
234
+ try:
235
+ self._test_api.test_service_update_test(
236
+ test_name=self.key,
237
+ test=models.RequiredTheTestToUpdate(documents=self._document_names),
238
+ )
239
+ except Exception as err:
240
+ self._document_names.remove(document.key)
241
+ raise RuntimeError("Failed to link the document to the test.") from err
242
+
243
+ def unlink_document(self, document_key: str):
244
+ """Deletes a document attached to the test.
245
+
246
+ Args:
247
+ document_key (str): Resource name of the document to be detached from the test.
248
+ """
249
+ try:
250
+ self._document_names.remove(document_key)
251
+ except ValueError as err:
252
+ raise ValueError(
253
+ f"Document {document_key} is not attached to the test."
254
+ ) from err
255
+
256
+ try:
257
+ self._test_api.test_service_update_test(
258
+ test_name=self.key,
259
+ test=models.RequiredTheTestToUpdate(documents=self._document_names),
260
+ )
261
+ except Exception as err:
262
+ self._document_names.append(document_key)
263
+ raise RuntimeError("Failed to unlink the document from the test.") from err
264
+
265
+ @staticmethod
266
+ def _from_api_test(api_test: models.V1alphaTest, client: api.ApiClient) -> "Test":
267
+ return Test(
268
+ key=api_test.name or "",
269
+ name=api_test.display_name or "",
270
+ description=api_test.description or "",
271
+ create_time=api_test.create_time,
272
+ update_time=api_test.update_time,
273
+ _document_names=api_test.documents or [],
274
+ _client=client,
275
+ )
276
+
277
+
278
+ class _Tests:
279
+ def __init__(self, client: api.ApiClient):
280
+ self._client = client
281
+ self._api = api.TestServiceApi(client)
282
+
283
+ def list(self) -> List[Test]:
284
+ """Retrieves all user tests in the Eval Studio."""
285
+ res = self._api.test_service_list_tests()
286
+ if res and res.tests:
287
+ return [Test._from_api_test(t, self._client) for t in res.tests]
288
+
289
+ return []
290
+
291
+ def create(
292
+ self,
293
+ name: str,
294
+ description: Optional[str] = "",
295
+ documents: Optional[List[d7s.Document]] = None,
296
+ ) -> Optional[Test]:
297
+ """Creates a new test in the Eval Studio.
298
+
299
+ Args:
300
+ name (str): Name of the test.
301
+ description (str): Description of the test.
302
+ documents (optional): List of `Document`s to be attached to the test.
303
+ """
304
+ _documents = [d.key for d in documents] if documents else None
305
+ test = models.V1alphaTest(
306
+ display_name=name, description=description, documents=_documents
307
+ )
308
+ res = self._api.test_service_create_test(test)
309
+ if res and res.test:
310
+ return Test._from_api_test(res.test, self._client)
311
+
312
+ return None
313
+
314
+ def delete(self, key: str):
315
+ """Deletes the test with given resource name.
316
+
317
+ Args:
318
+ key (str): Resource name of the test to be deleted.
319
+ """
320
+ self._api.test_service_delete_test(key)
321
+
322
+ def import_test_suite(
323
+ self, test_suite: str, name_prefix: Optional[str] = None
324
+ ) -> List[Test]:
325
+ """Imports a list of tests (Test Suite) from a JSON.
326
+
327
+ Args:
328
+ test_suite (str): JSON string of the test suite.
329
+ name_prefix (str): Optional prefix to name the imported tests.
330
+ """
331
+ req = models.V1alphaBatchImportTestsRequest(
332
+ testsJson=test_suite, testDisplayNamePrefix=name_prefix or None
333
+ )
334
+ res = self._api.test_service_batch_import_tests(req)
335
+ if res and res.tests:
336
+ return [Test._from_api_test(t, self._client) for t in res.tests]
337
+
338
+ return []
339
+
340
+
341
+ class _PerturbatorConfiguration:
342
+ """Represents the configuration of a perturbator to use during the perturbation process.
343
+
344
+ Attributes:
345
+ perturbator (Perturbator or str): Perturbator to use or its key.
346
+ """
347
+
348
+ def __init__(self, perturbator: Union[p10s.Perturbator, str]):
349
+ self.name = (
350
+ perturbator.key
351
+ if isinstance(perturbator, p10s.Perturbator)
352
+ else perturbator
353
+ )
354
+ self.intensity = (
355
+ perturbator.intensity
356
+ if isinstance(perturbator, p10s.Perturbator)
357
+ else p10s.PerturbatorIntensity.medium
358
+ )
359
+ self.params = (
360
+ perturbator.params if isinstance(perturbator, p10s.Perturbator) else None
361
+ )
362
+
363
+ def to_api_proto(self) -> models.V1alphaPerturbatorConfiguration:
364
+ """Converts the client PerturbatorConfiguration to an API PerturbatorConfiguration."""
365
+ return models.V1alphaPerturbatorConfiguration(
366
+ name=self.name,
367
+ intensity=self.intensity.to_api_proto(),
368
+ params=json.dumps(self.params) if self.params else None,
369
+ )
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.3
2
+ Name: eval-studio-client
3
+ Version: 0.7.0
4
+ Project-URL: Source, https://github.com/h2oai/eval-studio/tree/main/client-py/src/
5
+ Project-URL: Issues, https://github.com/h2oai/eval-studio/issues
6
+ Author-email: "H2O.ai" <support@h2o.ai>
7
+ License-Expression: MIT
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Programming Language :: Python
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Requires-Python: >=3.9
14
+ Requires-Dist: h2o-authn<3.0.0,>=2.0.0
15
+ Requires-Dist: pydantic>=2
16
+ Requires-Dist: python-dateutil>=2.5.3
17
+ Requires-Dist: typing-extensions>=4.7.1
18
+ Requires-Dist: urllib3<2.3.0,>=1.26.19