scorable 1.6.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scorable might be problematic. Click here for more details.

Files changed (228) hide show
  1. root/__about__.py +4 -0
  2. root/__init__.py +17 -0
  3. root/client.py +207 -0
  4. root/datasets.py +231 -0
  5. root/execution_logs.py +162 -0
  6. root/generated/__init__.py +0 -0
  7. root/generated/openapi_aclient/__init__.py +1 -0
  8. root/generated/openapi_aclient/api/__init__.py +1 -0
  9. root/generated/openapi_aclient/api/datasets_api.py +1274 -0
  10. root/generated/openapi_aclient/api/evaluators_api.py +3641 -0
  11. root/generated/openapi_aclient/api/execution_logs_api.py +751 -0
  12. root/generated/openapi_aclient/api/judges_api.py +3794 -0
  13. root/generated/openapi_aclient/api/models_api.py +1473 -0
  14. root/generated/openapi_aclient/api/objectives_api.py +1767 -0
  15. root/generated/openapi_aclient/api_client.py +662 -0
  16. root/generated/openapi_aclient/api_response.py +22 -0
  17. root/generated/openapi_aclient/configuration.py +470 -0
  18. root/generated/openapi_aclient/exceptions.py +197 -0
  19. root/generated/openapi_aclient/models/__init__.py +122 -0
  20. root/generated/openapi_aclient/models/data_set_create.py +118 -0
  21. root/generated/openapi_aclient/models/data_set_create_request.py +105 -0
  22. root/generated/openapi_aclient/models/data_set_list.py +129 -0
  23. root/generated/openapi_aclient/models/data_set_type.py +36 -0
  24. root/generated/openapi_aclient/models/dataset_range_request.py +93 -0
  25. root/generated/openapi_aclient/models/evaluator.py +273 -0
  26. root/generated/openapi_aclient/models/evaluator_calibration_output.py +101 -0
  27. root/generated/openapi_aclient/models/evaluator_calibration_result.py +134 -0
  28. root/generated/openapi_aclient/models/evaluator_demonstrations.py +107 -0
  29. root/generated/openapi_aclient/models/evaluator_demonstrations_request.py +107 -0
  30. root/generated/openapi_aclient/models/evaluator_execution_function_parameter_property_request.py +86 -0
  31. root/generated/openapi_aclient/models/evaluator_execution_function_parameter_request.py +109 -0
  32. root/generated/openapi_aclient/models/evaluator_execution_function_parameter_type_enum.py +35 -0
  33. root/generated/openapi_aclient/models/evaluator_execution_function_request.py +99 -0
  34. root/generated/openapi_aclient/models/evaluator_execution_functions_request.py +98 -0
  35. root/generated/openapi_aclient/models/evaluator_execution_functions_type_enum.py +35 -0
  36. root/generated/openapi_aclient/models/evaluator_execution_request.py +134 -0
  37. root/generated/openapi_aclient/models/evaluator_execution_result.py +114 -0
  38. root/generated/openapi_aclient/models/evaluator_inputs_value.py +100 -0
  39. root/generated/openapi_aclient/models/evaluator_inputs_value_items.py +89 -0
  40. root/generated/openapi_aclient/models/evaluator_list_output.py +198 -0
  41. root/generated/openapi_aclient/models/evaluator_reference.py +90 -0
  42. root/generated/openapi_aclient/models/evaluator_reference_request.py +90 -0
  43. root/generated/openapi_aclient/models/evaluator_request.py +194 -0
  44. root/generated/openapi_aclient/models/evaluator_result.py +110 -0
  45. root/generated/openapi_aclient/models/execution_log_details.py +291 -0
  46. root/generated/openapi_aclient/models/execution_log_details_evaluation_context.py +83 -0
  47. root/generated/openapi_aclient/models/execution_log_details_evaluator_latencies_inner.py +83 -0
  48. root/generated/openapi_aclient/models/execution_log_list.py +217 -0
  49. root/generated/openapi_aclient/models/execution_log_list_evaluation_context.py +83 -0
  50. root/generated/openapi_aclient/models/generation_model_params_request.py +93 -0
  51. root/generated/openapi_aclient/models/id.py +87 -0
  52. root/generated/openapi_aclient/models/input_variable.py +121 -0
  53. root/generated/openapi_aclient/models/input_variable_request.py +82 -0
  54. root/generated/openapi_aclient/models/judge.py +178 -0
  55. root/generated/openapi_aclient/models/judge_execution_request.py +114 -0
  56. root/generated/openapi_aclient/models/judge_execution_response.py +97 -0
  57. root/generated/openapi_aclient/models/judge_files_inner.py +84 -0
  58. root/generated/openapi_aclient/models/judge_generator_request.py +142 -0
  59. root/generated/openapi_aclient/models/judge_generator_response.py +88 -0
  60. root/generated/openapi_aclient/models/judge_invite_request.py +87 -0
  61. root/generated/openapi_aclient/models/judge_list.py +156 -0
  62. root/generated/openapi_aclient/models/judge_rectifier_request_request.py +114 -0
  63. root/generated/openapi_aclient/models/judge_rectifier_response.py +121 -0
  64. root/generated/openapi_aclient/models/judge_request.py +108 -0
  65. root/generated/openapi_aclient/models/model.py +126 -0
  66. root/generated/openapi_aclient/models/model_list.py +115 -0
  67. root/generated/openapi_aclient/models/model_params.py +89 -0
  68. root/generated/openapi_aclient/models/model_params_request.py +89 -0
  69. root/generated/openapi_aclient/models/model_request.py +118 -0
  70. root/generated/openapi_aclient/models/nested_evaluator.py +110 -0
  71. root/generated/openapi_aclient/models/nested_evaluator_objective.py +87 -0
  72. root/generated/openapi_aclient/models/nested_evaluator_request.py +92 -0
  73. root/generated/openapi_aclient/models/nested_objective_evaluator.py +105 -0
  74. root/generated/openapi_aclient/models/nested_objective_evaluator_request.py +92 -0
  75. root/generated/openapi_aclient/models/nested_objective_list.py +111 -0
  76. root/generated/openapi_aclient/models/nested_user_details.py +88 -0
  77. root/generated/openapi_aclient/models/nested_user_details_request.py +82 -0
  78. root/generated/openapi_aclient/models/nested_vector_objective.py +88 -0
  79. root/generated/openapi_aclient/models/nested_vector_objective_request.py +82 -0
  80. root/generated/openapi_aclient/models/objective.py +157 -0
  81. root/generated/openapi_aclient/models/objective_list.py +128 -0
  82. root/generated/openapi_aclient/models/objective_request.py +113 -0
  83. root/generated/openapi_aclient/models/objective_validator.py +100 -0
  84. root/generated/openapi_aclient/models/objective_validator_request.py +90 -0
  85. root/generated/openapi_aclient/models/paginated_data_set_list_list.py +111 -0
  86. root/generated/openapi_aclient/models/paginated_evaluator_list.py +111 -0
  87. root/generated/openapi_aclient/models/paginated_evaluator_list_output_list.py +111 -0
  88. root/generated/openapi_aclient/models/paginated_execution_log_list_list.py +111 -0
  89. root/generated/openapi_aclient/models/paginated_judge_list_list.py +111 -0
  90. root/generated/openapi_aclient/models/paginated_model_list_list.py +111 -0
  91. root/generated/openapi_aclient/models/paginated_objective_list.py +111 -0
  92. root/generated/openapi_aclient/models/paginated_objective_list_list.py +111 -0
  93. root/generated/openapi_aclient/models/patched_evaluator_request.py +194 -0
  94. root/generated/openapi_aclient/models/patched_judge_request.py +110 -0
  95. root/generated/openapi_aclient/models/patched_model_request.py +118 -0
  96. root/generated/openapi_aclient/models/patched_objective_request.py +113 -0
  97. root/generated/openapi_aclient/models/provider.py +99 -0
  98. root/generated/openapi_aclient/models/reference_variable.py +123 -0
  99. root/generated/openapi_aclient/models/reference_variable_request.py +83 -0
  100. root/generated/openapi_aclient/models/skill_execution_validator_result.py +130 -0
  101. root/generated/openapi_aclient/models/skill_test_data_request.py +107 -0
  102. root/generated/openapi_aclient/models/skill_test_data_request_dataset_range.py +93 -0
  103. root/generated/openapi_aclient/models/skill_test_input_request.py +171 -0
  104. root/generated/openapi_aclient/models/skill_type_enum.py +36 -0
  105. root/generated/openapi_aclient/models/status_change.py +84 -0
  106. root/generated/openapi_aclient/models/status_change_request.py +84 -0
  107. root/generated/openapi_aclient/models/status_change_status_enum.py +36 -0
  108. root/generated/openapi_aclient/models/status_enum.py +38 -0
  109. root/generated/openapi_aclient/models/validation_result_status.py +36 -0
  110. root/generated/openapi_aclient/models/visibility_enum.py +38 -0
  111. root/generated/openapi_aclient/rest.py +166 -0
  112. root/generated/openapi_aclient_README.md +239 -0
  113. root/generated/openapi_client/__init__.py +1 -0
  114. root/generated/openapi_client/api/__init__.py +1 -0
  115. root/generated/openapi_client/api/datasets_api.py +1274 -0
  116. root/generated/openapi_client/api/evaluators_api.py +3641 -0
  117. root/generated/openapi_client/api/execution_logs_api.py +751 -0
  118. root/generated/openapi_client/api/judges_api.py +3794 -0
  119. root/generated/openapi_client/api/models_api.py +1473 -0
  120. root/generated/openapi_client/api/objectives_api.py +1767 -0
  121. root/generated/openapi_client/api_client.py +659 -0
  122. root/generated/openapi_client/api_response.py +22 -0
  123. root/generated/openapi_client/configuration.py +474 -0
  124. root/generated/openapi_client/exceptions.py +197 -0
  125. root/generated/openapi_client/models/__init__.py +120 -0
  126. root/generated/openapi_client/models/data_set_create.py +118 -0
  127. root/generated/openapi_client/models/data_set_create_request.py +105 -0
  128. root/generated/openapi_client/models/data_set_list.py +129 -0
  129. root/generated/openapi_client/models/data_set_type.py +36 -0
  130. root/generated/openapi_client/models/dataset_range_request.py +93 -0
  131. root/generated/openapi_client/models/evaluator.py +273 -0
  132. root/generated/openapi_client/models/evaluator_calibration_output.py +101 -0
  133. root/generated/openapi_client/models/evaluator_calibration_result.py +134 -0
  134. root/generated/openapi_client/models/evaluator_demonstrations.py +107 -0
  135. root/generated/openapi_client/models/evaluator_demonstrations_request.py +107 -0
  136. root/generated/openapi_client/models/evaluator_execution_function_parameter_property_request.py +86 -0
  137. root/generated/openapi_client/models/evaluator_execution_function_parameter_request.py +109 -0
  138. root/generated/openapi_client/models/evaluator_execution_function_parameter_type_enum.py +35 -0
  139. root/generated/openapi_client/models/evaluator_execution_function_request.py +99 -0
  140. root/generated/openapi_client/models/evaluator_execution_functions_request.py +98 -0
  141. root/generated/openapi_client/models/evaluator_execution_functions_type_enum.py +35 -0
  142. root/generated/openapi_client/models/evaluator_execution_request.py +134 -0
  143. root/generated/openapi_client/models/evaluator_execution_result.py +114 -0
  144. root/generated/openapi_client/models/evaluator_inputs_value.py +100 -0
  145. root/generated/openapi_client/models/evaluator_inputs_value_items.py +89 -0
  146. root/generated/openapi_client/models/evaluator_list_output.py +198 -0
  147. root/generated/openapi_client/models/evaluator_reference.py +90 -0
  148. root/generated/openapi_client/models/evaluator_reference_request.py +90 -0
  149. root/generated/openapi_client/models/evaluator_request.py +194 -0
  150. root/generated/openapi_client/models/evaluator_result.py +110 -0
  151. root/generated/openapi_client/models/execution_log_details.py +291 -0
  152. root/generated/openapi_client/models/execution_log_details_evaluation_context.py +83 -0
  153. root/generated/openapi_client/models/execution_log_details_evaluator_latencies_inner.py +83 -0
  154. root/generated/openapi_client/models/execution_log_list.py +215 -0
  155. root/generated/openapi_client/models/execution_log_list_evaluation_context.py +83 -0
  156. root/generated/openapi_client/models/generation_model_params_request.py +93 -0
  157. root/generated/openapi_client/models/id.py +87 -0
  158. root/generated/openapi_client/models/input_variable.py +121 -0
  159. root/generated/openapi_client/models/input_variable_request.py +82 -0
  160. root/generated/openapi_client/models/judge.py +178 -0
  161. root/generated/openapi_client/models/judge_execution_request.py +114 -0
  162. root/generated/openapi_client/models/judge_execution_response.py +97 -0
  163. root/generated/openapi_client/models/judge_files_inner.py +84 -0
  164. root/generated/openapi_client/models/judge_generator_request.py +142 -0
  165. root/generated/openapi_client/models/judge_generator_response.py +88 -0
  166. root/generated/openapi_client/models/judge_invite_request.py +87 -0
  167. root/generated/openapi_client/models/judge_list.py +156 -0
  168. root/generated/openapi_client/models/judge_rectifier_request_request.py +114 -0
  169. root/generated/openapi_client/models/judge_rectifier_response.py +121 -0
  170. root/generated/openapi_client/models/judge_request.py +108 -0
  171. root/generated/openapi_client/models/model.py +126 -0
  172. root/generated/openapi_client/models/model_list.py +115 -0
  173. root/generated/openapi_client/models/model_params.py +89 -0
  174. root/generated/openapi_client/models/model_params_request.py +89 -0
  175. root/generated/openapi_client/models/model_request.py +118 -0
  176. root/generated/openapi_client/models/nested_evaluator.py +110 -0
  177. root/generated/openapi_client/models/nested_evaluator_objective.py +87 -0
  178. root/generated/openapi_client/models/nested_evaluator_request.py +92 -0
  179. root/generated/openapi_client/models/nested_objective_evaluator.py +105 -0
  180. root/generated/openapi_client/models/nested_objective_evaluator_request.py +92 -0
  181. root/generated/openapi_client/models/nested_objective_list.py +111 -0
  182. root/generated/openapi_client/models/nested_user_details.py +88 -0
  183. root/generated/openapi_client/models/nested_user_details_request.py +82 -0
  184. root/generated/openapi_client/models/nested_vector_objective.py +88 -0
  185. root/generated/openapi_client/models/nested_vector_objective_request.py +82 -0
  186. root/generated/openapi_client/models/objective.py +157 -0
  187. root/generated/openapi_client/models/objective_list.py +128 -0
  188. root/generated/openapi_client/models/objective_request.py +113 -0
  189. root/generated/openapi_client/models/objective_validator.py +100 -0
  190. root/generated/openapi_client/models/objective_validator_request.py +90 -0
  191. root/generated/openapi_client/models/paginated_data_set_list_list.py +111 -0
  192. root/generated/openapi_client/models/paginated_evaluator_list.py +111 -0
  193. root/generated/openapi_client/models/paginated_evaluator_list_output_list.py +111 -0
  194. root/generated/openapi_client/models/paginated_execution_log_list_list.py +111 -0
  195. root/generated/openapi_client/models/paginated_judge_list_list.py +111 -0
  196. root/generated/openapi_client/models/paginated_model_list_list.py +111 -0
  197. root/generated/openapi_client/models/paginated_objective_list.py +111 -0
  198. root/generated/openapi_client/models/paginated_objective_list_list.py +111 -0
  199. root/generated/openapi_client/models/patched_evaluator_request.py +194 -0
  200. root/generated/openapi_client/models/patched_judge_request.py +110 -0
  201. root/generated/openapi_client/models/patched_model_request.py +118 -0
  202. root/generated/openapi_client/models/patched_objective_request.py +113 -0
  203. root/generated/openapi_client/models/provider.py +99 -0
  204. root/generated/openapi_client/models/reference_variable.py +123 -0
  205. root/generated/openapi_client/models/reference_variable_request.py +83 -0
  206. root/generated/openapi_client/models/skill_execution_validator_result.py +130 -0
  207. root/generated/openapi_client/models/skill_test_data_request.py +107 -0
  208. root/generated/openapi_client/models/skill_test_data_request_dataset_range.py +93 -0
  209. root/generated/openapi_client/models/skill_test_input_request.py +171 -0
  210. root/generated/openapi_client/models/skill_type_enum.py +36 -0
  211. root/generated/openapi_client/models/status_change.py +84 -0
  212. root/generated/openapi_client/models/status_change_request.py +84 -0
  213. root/generated/openapi_client/models/status_change_status_enum.py +36 -0
  214. root/generated/openapi_client/models/status_enum.py +38 -0
  215. root/generated/openapi_client/models/validation_result_status.py +36 -0
  216. root/generated/openapi_client/models/visibility_enum.py +38 -0
  217. root/generated/openapi_client/rest.py +203 -0
  218. root/generated/openapi_client_README.md +238 -0
  219. root/judges.py +681 -0
  220. root/models.py +197 -0
  221. root/objectives.py +343 -0
  222. root/py.typed +0 -0
  223. root/skills.py +1707 -0
  224. root/utils.py +90 -0
  225. scorable-1.6.4.dist-info/METADATA +395 -0
  226. scorable-1.6.4.dist-info/RECORD +228 -0
  227. scorable-1.6.4.dist-info/WHEEL +4 -0
  228. scorable-1.6.4.dist-info/licenses/LICENSE +202 -0
root/judges.py ADDED
@@ -0,0 +1,681 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import AbstractAsyncContextManager
4
+ from functools import partial
5
+ from typing import AsyncIterator, Dict, Iterator, List, Literal, Optional, Union, cast
6
+
7
+ from pydantic import StrictStr
8
+
9
+ from root.generated.openapi_aclient.models.judge_generator_request import (
10
+ JudgeGeneratorRequest as AJudgeGeneratorRequest,
11
+ )
12
+ from root.generated.openapi_aclient.models.judge_generator_response import (
13
+ JudgeGeneratorResponse as AJudgeGeneratorResponse,
14
+ )
15
+ from root.generated.openapi_aclient.models.judge_request import (
16
+ JudgeRequest as AJudgeRequest,
17
+ )
18
+ from root.generated.openapi_aclient.models.status_enum import (
19
+ StatusEnum as AStatusEnum,
20
+ )
21
+ from root.generated.openapi_aclient.models.visibility_enum import VisibilityEnum as AVisibilityEnum
22
+ from root.generated.openapi_client.models.judge_generator_request import JudgeGeneratorRequest
23
+ from root.generated.openapi_client.models.judge_generator_response import JudgeGeneratorResponse
24
+ from root.generated.openapi_client.models.judge_request import JudgeRequest
25
+ from root.generated.openapi_client.models.status_enum import StatusEnum
26
+ from root.generated.openapi_client.models.visibility_enum import VisibilityEnum
27
+
28
+ from .generated.openapi_aclient import ApiClient as AApiClient
29
+ from .generated.openapi_aclient.api.judges_api import JudgesApi as AJudgesApi
30
+ from .generated.openapi_aclient.models.evaluator_execution_functions_request import (
31
+ EvaluatorExecutionFunctionsRequest as AEvaluatorExecutionFunctionsRequest,
32
+ )
33
+ from .generated.openapi_aclient.models.evaluator_reference_request import (
34
+ EvaluatorReferenceRequest as AEvaluatorReferenceRequest,
35
+ )
36
+ from .generated.openapi_aclient.models.judge import Judge as AOpenApiJudge
37
+ from .generated.openapi_aclient.models.judge_execution_request import (
38
+ JudgeExecutionRequest as AJudgeExecutionRequest,
39
+ )
40
+ from .generated.openapi_aclient.models.judge_execution_response import (
41
+ JudgeExecutionResponse as AJudgeExecutionResponse,
42
+ )
43
+ from .generated.openapi_aclient.models.judge_list import JudgeList as AJudgeList
44
+ from .generated.openapi_aclient.models.paginated_judge_list_list import (
45
+ PaginatedJudgeListList as APaginatedJudgeListList,
46
+ )
47
+ from .generated.openapi_aclient.models.patched_judge_request import (
48
+ PatchedJudgeRequest as APatchedJudgeRequest,
49
+ )
50
+ from .generated.openapi_client import ApiClient
51
+ from .generated.openapi_client.api.judges_api import JudgesApi
52
+ from .generated.openapi_client.models.evaluator_execution_functions_request import (
53
+ EvaluatorExecutionFunctionsRequest,
54
+ )
55
+ from .generated.openapi_client.models.evaluator_reference_request import EvaluatorReferenceRequest
56
+ from .generated.openapi_client.models.judge import Judge as OpenApiJudge
57
+ from .generated.openapi_client.models.judge_execution_request import JudgeExecutionRequest
58
+ from .generated.openapi_client.models.judge_execution_response import JudgeExecutionResponse
59
+ from .generated.openapi_client.models.judge_list import JudgeList
60
+ from .generated.openapi_client.models.paginated_judge_list_list import PaginatedJudgeListList
61
+ from .generated.openapi_client.models.patched_judge_request import PatchedJudgeRequest
62
+ from .utils import ClientContextCallable, with_async_client, with_sync_client
63
+
64
+
65
+ class Judge(OpenApiJudge):
66
+ """Wrapper for a single Judge.
67
+
68
+ For available attributes, please check the (automatically
69
+ generated) superclass documentation.
70
+ """
71
+
72
+ client_context: ClientContextCallable
73
+
74
+ @classmethod
75
+ def _wrap(cls, apiobj: Union[OpenApiJudge, JudgeList], client_context: ClientContextCallable) -> Judge:
76
+ """Wrap API object into a Judge instance."""
77
+ if not isinstance(apiobj, (OpenApiJudge, JudgeList)):
78
+ raise ValueError(f"Wrong instance in _wrap: {apiobj!r}")
79
+ obj = cast(Judge, apiobj)
80
+ obj.__class__ = cls
81
+ obj.client_context = client_context
82
+ return obj
83
+
84
+ @with_sync_client
85
+ def run(
86
+ self,
87
+ *,
88
+ response: str,
89
+ request: Optional[str] = None,
90
+ contexts: Optional[List[str]] = None,
91
+ functions: Optional[List[EvaluatorExecutionFunctionsRequest]] = None,
92
+ expected_output: Optional[str] = None,
93
+ tags: Optional[List[str]] = None,
94
+ _request_timeout: Optional[int] = None,
95
+ _client: ApiClient,
96
+ ) -> JudgeExecutionResponse:
97
+ """
98
+ Run the judge.
99
+
100
+ Args:
101
+ response: LLM output to evaluate
102
+ request: The prompt sent to the LLM. Optional.
103
+ contexts: Optional documents passed to RAG evaluators
104
+ functions: Optional functions to execute
105
+ expected_output: Optional expected output
106
+ tags: Optional tags to add to the judge execution
107
+ _request_timeout: Optional timeout for the request
108
+ """
109
+ api_instance = JudgesApi(_client)
110
+ execution_request = JudgeExecutionRequest(
111
+ request=request,
112
+ response=response,
113
+ contexts=contexts,
114
+ functions=functions,
115
+ expected_output=expected_output,
116
+ tags=tags,
117
+ )
118
+ return api_instance.judges_execute_create(
119
+ judge_id=self.id,
120
+ judge_execution_request=execution_request,
121
+ _request_timeout=_request_timeout,
122
+ )
123
+
124
+
125
+ class AJudge(AOpenApiJudge):
126
+ """
127
+ Async wrapper for a single Judge.
128
+
129
+ For available attributes, please check the (automatically
130
+ generated) superclass documentation.
131
+ """
132
+
133
+ client_context: ClientContextCallable
134
+
135
+ @classmethod
136
+ async def _awrap(cls, apiobj: Union[AOpenApiJudge, AJudgeList], client_context: ClientContextCallable) -> AJudge:
137
+ if not isinstance(apiobj, (AOpenApiJudge, AJudgeList)):
138
+ raise ValueError(f"Wrong instance in _wrap: {apiobj!r}")
139
+ obj = cast(AJudge, apiobj)
140
+ obj.__class__ = cls
141
+ obj.client_context = client_context
142
+ return obj
143
+
144
+ @with_async_client
145
+ async def arun(
146
+ self,
147
+ *,
148
+ response: str,
149
+ request: Optional[str] = None,
150
+ contexts: Optional[List[str]] = None,
151
+ functions: Optional[List[AEvaluatorExecutionFunctionsRequest]] = None,
152
+ expected_output: Optional[str] = None,
153
+ tags: Optional[List[str]] = None,
154
+ _request_timeout: Optional[int] = None,
155
+ _client: AApiClient,
156
+ ) -> AJudgeExecutionResponse:
157
+ """
158
+ Asynchronously run the judge.
159
+
160
+ Args:
161
+ response: LLM output to evaluate
162
+ request: The prompt sent to the LLM. Optional.
163
+ contexts: Optional documents passed to RAG evaluators
164
+ functions: Optional functions to execute
165
+ expected_output: Optional expected output
166
+ tags: Optional tags to add to the judge execution
167
+ _request_timeout: Optional timeout for the request
168
+ """
169
+ api_instance = AJudgesApi(_client)
170
+ execution_request = AJudgeExecutionRequest(
171
+ contexts=contexts,
172
+ functions=functions,
173
+ expected_output=expected_output,
174
+ request=request,
175
+ response=response,
176
+ tags=tags,
177
+ )
178
+ return await api_instance.judges_execute_create(
179
+ judge_id=self.id,
180
+ judge_execution_request=execution_request,
181
+ _request_timeout=_request_timeout,
182
+ )
183
+
184
+
185
+ class Judges:
186
+ """
187
+ Judges API
188
+
189
+ Note:
190
+ The construction of the API instance should be handled by
191
+ accessing an attribute of a :class:`root.client.RootSignals` instance.
192
+ """
193
+
194
+ def __init__(self, client_context: ClientContextCallable):
195
+ self.client_context = client_context
196
+
197
+ @with_sync_client
198
+ def generate(
199
+ self,
200
+ *,
201
+ intent: str,
202
+ visibility: Literal["public", "unlisted"] = "unlisted",
203
+ stage: Optional[str] = None,
204
+ extra_contexts: Optional[Dict[str, str | None]] = None,
205
+ strict: bool = False,
206
+ _request_timeout: Optional[int] = None,
207
+ _client: ApiClient,
208
+ ) -> JudgeGeneratorResponse:
209
+ """
210
+ Generate a judge.
211
+
212
+ Args:
213
+ intent: Describe what you want the judge to build for.
214
+ Example: I am building a chatbot for ecommerce and I would like to measure the quality of the responses.
215
+ visibility: Whether the judge should be visible to everyone or only to your organization.
216
+ stage: If the intent is ambiguous, you can specify the stage of the judge.
217
+ Example: For a chatbot judge, we can specify the stage to be "response generation".
218
+ extra_contexts: Extra contexts to be passed to the judge.
219
+ Example: {"domain": "Ecommerce selling clothing"}, {"audience": "Women aged 25-35"}
220
+ strict: Whether to fail generation if the intent is ambiguous.
221
+ _request_timeout: Optional timeout for the request
222
+
223
+ Returns:
224
+ Wrapper for the judge id and optionally an error code if the generation failed.
225
+ """
226
+ api_instance = JudgesApi(_client)
227
+ judge_request = JudgeGeneratorRequest(
228
+ intent=intent,
229
+ stage=stage,
230
+ extra_contexts=extra_contexts,
231
+ strict=strict,
232
+ visibility=VisibilityEnum.GLOBAL if visibility == "public" else VisibilityEnum.UNLISTED,
233
+ )
234
+ return api_instance.judges_generate_create(
235
+ judge_generator_request=judge_request, _request_timeout=_request_timeout
236
+ )
237
+
238
+ @with_async_client
239
+ async def agenerate(
240
+ self,
241
+ *,
242
+ intent: str,
243
+ visibility: Literal["public", "unlisted"] = "unlisted",
244
+ stage: Optional[str] = None,
245
+ extra_contexts: Optional[Dict[str, str | None]] = None,
246
+ strict: bool = False,
247
+ _request_timeout: Optional[int] = None,
248
+ _client: AApiClient,
249
+ ) -> AJudgeGeneratorResponse:
250
+ """
251
+ Asynchronously generate a judge.
252
+
253
+ Args:
254
+ intent: Describe what you want the judge to build for.
255
+ Example: I am building a chatbot for ecommerce and I would like to measure the quality of the responses.
256
+ visibility: Whether the judge should be visible to everyone or only to your organization.
257
+ stage: If the intent is ambiguous, you can specify the stage of the judge.
258
+ Example: For a chatbot judge, we can specify the stage to be "response generation".
259
+ extra_contexts: Extra contexts to be passed to the judge.
260
+ Example: {"domain": "Ecommerce selling clothing"}, {"audience": "Women aged 25-35"}
261
+ strict: Whether to fail generation if the intent is ambiguous.
262
+ _request_timeout: Optional timeout for the request
263
+
264
+ Returns:
265
+ Wrapper for the judge id and optionally an error code if the generation failed.
266
+ """
267
+ api_instance = AJudgesApi(_client)
268
+ judge_request = AJudgeGeneratorRequest(
269
+ intent=intent,
270
+ stage=stage,
271
+ extra_contexts=extra_contexts,
272
+ strict=strict,
273
+ visibility=AVisibilityEnum.GLOBAL if visibility == "public" else AVisibilityEnum.UNLISTED,
274
+ )
275
+ return await api_instance.judges_generate_create(
276
+ judge_generator_request=judge_request, _request_timeout=_request_timeout
277
+ )
278
+
279
+ @with_sync_client
280
+ def create(
281
+ self,
282
+ *,
283
+ name: str,
284
+ intent: str,
285
+ evaluator_references: Optional[List[EvaluatorReferenceRequest]] = None,
286
+ stage: Optional[str] = None,
287
+ status: Literal["unlisted", "listed", "public", "public_unlisted"] = "unlisted",
288
+ _request_timeout: Optional[int] = None,
289
+ _client: ApiClient,
290
+ ) -> Judge:
291
+ """
292
+ Create a new judge with a name, intent, and list of evaluators.
293
+
294
+ Args:
295
+ name: Name for the judge
296
+ intent: Intent for the judge
297
+ evaluator_references: List of evaluator references to include in the judge
298
+ stage: Stage for the judge
299
+ status: Status of the judge
300
+ _request_timeout: Optional timeout for the request
301
+ """
302
+ api_instance = JudgesApi(_client)
303
+ request = JudgeRequest(
304
+ name=name,
305
+ intent=intent,
306
+ evaluator_references=evaluator_references,
307
+ stage=stage,
308
+ status=StatusEnum(status),
309
+ )
310
+ return Judge._wrap(
311
+ api_instance.judges_create(judge_request=request, _request_timeout=_request_timeout),
312
+ client_context=self.client_context,
313
+ )
314
+
315
+ @with_async_client
316
+ async def acreate(
317
+ self,
318
+ *,
319
+ name: str,
320
+ intent: str,
321
+ evaluator_references: Optional[List[AEvaluatorReferenceRequest]] = None,
322
+ stage: Optional[str] = None,
323
+ status: Literal["unlisted", "listed", "public", "public_unlisted"] = "unlisted",
324
+ _request_timeout: Optional[int] = None,
325
+ _client: AApiClient,
326
+ ) -> AJudge:
327
+ """
328
+ Asynchronously create a new judge with a name, intent, and list of evaluators.
329
+
330
+ Args:
331
+ name: Name for the judge
332
+ intent: Intent for the judge
333
+ evaluator_references: List of evaluator references to include in the judge
334
+ stage: Stage for the judge
335
+ status: Status of the judge
336
+ _request_timeout: Optional timeout for the request
337
+ """
338
+ api_instance = AJudgesApi(_client)
339
+ request = AJudgeRequest(
340
+ name=name,
341
+ intent=intent,
342
+ evaluator_references=evaluator_references,
343
+ stage=stage,
344
+ status=AStatusEnum(status),
345
+ )
346
+ return await AJudge._awrap(
347
+ await api_instance.judges_create(judge_request=request, _request_timeout=_request_timeout),
348
+ client_context=self.client_context,
349
+ )
350
+
351
+ @with_sync_client
352
+ def get(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: ApiClient) -> Judge:
353
+ """
354
+ Get a judge by ID.
355
+
356
+ Args:
357
+ judge_id: The judge to be fetched.
358
+ """
359
+ api_instance = JudgesApi(_client)
360
+ return Judge._wrap(
361
+ api_instance.judges_retrieve(id=judge_id, _request_timeout=_request_timeout),
362
+ client_context=self.client_context,
363
+ )
364
+
365
+ @with_async_client
366
+ async def aget(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: AApiClient) -> AJudge:
367
+ """
368
+ Asynchronously get a judge by ID.
369
+
370
+ Args:
371
+ judge_id: The judge to be fetched.
372
+ """
373
+ api_instance = AJudgesApi(_client)
374
+ return await AJudge._awrap(
375
+ await api_instance.judges_retrieve(id=judge_id, _request_timeout=_request_timeout),
376
+ client_context=self.client_context,
377
+ )
378
+
379
+ @with_sync_client
380
+ def delete(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: ApiClient) -> None:
381
+ """
382
+ Delete the judge.
383
+
384
+ Args:
385
+ judge_id: The judge to be deleted.
386
+ """
387
+ api_instance = JudgesApi(_client)
388
+ return api_instance.judges_destroy(id=judge_id, _request_timeout=_request_timeout)
389
+
390
+ @with_async_client
391
+ async def adelete(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: AApiClient) -> None:
392
+ """
393
+ Asynchronously delete the judge.
394
+
395
+ Args:
396
+ judge_id: The judge to be deleted.
397
+ """
398
+ api_instance = AJudgesApi(_client)
399
+ return await api_instance.judges_destroy(id=judge_id, _request_timeout=_request_timeout)
400
+
401
+ @with_sync_client
402
+ def list(self, *, limit: int = 100, _client: ApiClient) -> Iterator[Judge]:
403
+ """
404
+ Iterate through the judges.
405
+
406
+ Args:
407
+ limit: Number of entries to iterate through at most.
408
+ """
409
+ api_instance = JudgesApi(_client)
410
+ cursor: Optional[StrictStr] = None
411
+ while limit > 0:
412
+ result: PaginatedJudgeListList = api_instance.judges_list(page_size=limit, cursor=cursor)
413
+ if not result.results:
414
+ return
415
+
416
+ used_results = result.results[:limit]
417
+ limit -= len(used_results)
418
+ for judge in used_results:
419
+ yield Judge._wrap(judge, client_context=self.client_context)
420
+
421
+ if not (cursor := result.next):
422
+ return
423
+
424
+ async def alist(self, *, limit: int = 100) -> AsyncIterator[AJudge]:
425
+ """
426
+ Asynchronously iterate through the judges.
427
+
428
+ Args:
429
+ limit: Number of entries to iterate through at most.
430
+ """
431
+ context = self.client_context()
432
+ assert isinstance(context, AbstractAsyncContextManager), "This method is not available in synchronous mode"
433
+ async with context as client:
434
+ api_instance = AJudgesApi(client)
435
+ partial_list = partial(api_instance.judges_list)
436
+
437
+ cursor: Optional[StrictStr] = None
438
+ while limit > 0:
439
+ result: APaginatedJudgeListList = await partial_list(page_size=limit, cursor=cursor)
440
+ if not result.results:
441
+ return
442
+
443
+ used_results = result.results[:limit]
444
+ limit -= len(used_results)
445
+ for judge in used_results:
446
+ yield await AJudge._awrap(judge, client_context=self.client_context)
447
+
448
+ if not (cursor := result.next):
449
+ return
450
+
451
+ @with_sync_client
452
+ def update(
453
+ self,
454
+ judge_id: str,
455
+ *,
456
+ name: Optional[str] = None,
457
+ evaluator_references: Optional[List[EvaluatorReferenceRequest]] = None,
458
+ _request_timeout: Optional[int] = None,
459
+ _client: ApiClient,
460
+ ) -> Judge:
461
+ """
462
+ Update an existing judge.
463
+
464
+ Args:
465
+ judge_id: The judge to be updated.
466
+ name: New name for the judge
467
+ evaluator_references: New list of evaluator references
468
+ """
469
+ api_instance = JudgesApi(_client)
470
+ request = PatchedJudgeRequest(
471
+ name=name,
472
+ evaluator_references=evaluator_references,
473
+ )
474
+ return Judge._wrap(
475
+ api_instance.judges_partial_update(
476
+ id=judge_id,
477
+ patched_judge_request=request,
478
+ _request_timeout=_request_timeout,
479
+ ),
480
+ client_context=self.client_context,
481
+ )
482
+
483
+ @with_async_client
484
+ async def aupdate(
485
+ self,
486
+ judge_id: str,
487
+ *,
488
+ name: Optional[str] = None,
489
+ evaluator_references: Optional[List[AEvaluatorReferenceRequest]] = None,
490
+ _request_timeout: Optional[int] = None,
491
+ _client: AApiClient,
492
+ ) -> AJudge:
493
+ """
494
+ Asynchronously update an existing judge.
495
+
496
+ Args:
497
+ judge_id: The judge to be updated.
498
+ name: New name for the judge
499
+ evaluator_references: New list of evaluator references
500
+ """
501
+ api_instance = AJudgesApi(_client)
502
+ request = APatchedJudgeRequest(
503
+ name=name,
504
+ evaluator_references=evaluator_references,
505
+ )
506
+ return await AJudge._awrap(
507
+ await api_instance.judges_partial_update(
508
+ id=judge_id,
509
+ patched_judge_request=request,
510
+ _request_timeout=_request_timeout,
511
+ ),
512
+ client_context=self.client_context,
513
+ )
514
+
515
+ @with_sync_client
516
+ def run(
517
+ self,
518
+ judge_id: str,
519
+ *,
520
+ response: str,
521
+ request: Optional[str] = None,
522
+ contexts: Optional[List[str]] = None,
523
+ functions: Optional[List[EvaluatorExecutionFunctionsRequest]] = None,
524
+ expected_output: Optional[str] = None,
525
+ tags: Optional[List[str]] = None,
526
+ _request_timeout: Optional[int] = None,
527
+ _client: ApiClient,
528
+ ) -> JudgeExecutionResponse:
529
+ """
530
+ Run a judge directly by ID.
531
+
532
+ Args:
533
+ judge_id: ID of the judge to run
534
+ response: LLM output to evaluate
535
+ request: The prompt sent to the LLM. Optional.
536
+ contexts: Optional documents passed to RAG evaluators
537
+ functions: Optional functions to execute
538
+ expected_output: Optional expected output
539
+ tags: Optional tags to add to the judge execution
540
+ _request_timeout: Optional timeout for the request
541
+ """
542
+ api_instance = JudgesApi(_client)
543
+ execution_request = JudgeExecutionRequest(
544
+ request=request,
545
+ response=response,
546
+ contexts=contexts,
547
+ functions=functions,
548
+ expected_output=expected_output,
549
+ tags=tags,
550
+ )
551
+ return api_instance.judges_execute_create(
552
+ judge_id=judge_id,
553
+ judge_execution_request=execution_request,
554
+ _request_timeout=_request_timeout,
555
+ )
556
+
557
+ @with_async_client
558
+ async def arun(
559
+ self,
560
+ judge_id: str,
561
+ *,
562
+ response: str,
563
+ request: Optional[str] = None,
564
+ contexts: Optional[List[str]] = None,
565
+ functions: Optional[List[AEvaluatorExecutionFunctionsRequest]] = None,
566
+ expected_output: Optional[str] = None,
567
+ tags: Optional[List[str]] = None,
568
+ _request_timeout: Optional[int] = None,
569
+ _client: AApiClient,
570
+ ) -> AJudgeExecutionResponse:
571
+ """
572
+ Asynchronously run a judge directly by ID.
573
+
574
+ Args:
575
+ judge_id: ID of the judge to run
576
+ response: LLM output to evaluate
577
+ request: The prompt sent to the LLM. Optional.
578
+ contexts: Optional documents passed to RAG evaluators
579
+ functions: Optional functions to execute
580
+ expected_output: Optional expected output
581
+ tags: Optional tags to add to the judge execution
582
+ _request_timeout: Optional timeout for the request
583
+ """
584
+ api_instance = AJudgesApi(_client)
585
+ execution_request = AJudgeExecutionRequest(
586
+ contexts=contexts,
587
+ functions=functions,
588
+ expected_output=expected_output,
589
+ request=request,
590
+ response=response,
591
+ tags=tags,
592
+ )
593
+ return await api_instance.judges_execute_create(
594
+ judge_id=judge_id,
595
+ judge_execution_request=execution_request,
596
+ _request_timeout=_request_timeout,
597
+ )
598
+
599
+ @with_sync_client
600
+ def run_by_name(
601
+ self,
602
+ name: str,
603
+ *,
604
+ response: str,
605
+ request: Optional[str] = None,
606
+ contexts: Optional[List[str]] = None,
607
+ functions: Optional[List[EvaluatorExecutionFunctionsRequest]] = None,
608
+ expected_output: Optional[str] = None,
609
+ tags: Optional[List[str]] = None,
610
+ _request_timeout: Optional[int] = None,
611
+ _client: ApiClient,
612
+ ) -> JudgeExecutionResponse:
613
+ """
614
+ Run a judge by name.
615
+
616
+ Args:
617
+ name: Name of the judge to run
618
+ response: LLM output to evaluate
619
+ request: The prompt sent to the LLM. Optional.
620
+ contexts: Optional documents passed to RAG evaluators
621
+ functions: Optional functions to execute
622
+ expected_output: Optional expected output
623
+ tags: Optional tags to add to the judge execution
624
+ _request_timeout: Optional timeout for the request
625
+ """
626
+ api_instance = JudgesApi(_client)
627
+ execution_request = JudgeExecutionRequest(
628
+ request=request,
629
+ response=response,
630
+ contexts=contexts,
631
+ functions=functions,
632
+ expected_output=expected_output,
633
+ tags=tags,
634
+ )
635
+ return api_instance.judges_execute_by_name_create(
636
+ name=name,
637
+ judge_execution_request=execution_request,
638
+ _request_timeout=_request_timeout,
639
+ )
640
+
641
+ @with_async_client
642
+ async def arun_by_name(
643
+ self,
644
+ name: str,
645
+ *,
646
+ response: str,
647
+ request: Optional[str] = None,
648
+ contexts: Optional[List[str]] = None,
649
+ functions: Optional[List[AEvaluatorExecutionFunctionsRequest]] = None,
650
+ expected_output: Optional[str] = None,
651
+ tags: Optional[List[str]] = None,
652
+ _request_timeout: Optional[int] = None,
653
+ _client: AApiClient,
654
+ ) -> AJudgeExecutionResponse:
655
+ """
656
+ Asynchronously run a judge by name.
657
+
658
+ Args:
659
+ name: Name of the judge to run
660
+ response: LLM output to evaluate
661
+ request: The prompt sent to the LLM. Optional.
662
+ contexts: Optional documents passed to RAG evaluators
663
+ functions: Optional functions to execute
664
+ expected_output: Optional expected output
665
+ tags: Optional tags to add to the judge execution
666
+ _request_timeout: Optional timeout for the request
667
+ """
668
+ api_instance = AJudgesApi(_client)
669
+ execution_request = AJudgeExecutionRequest(
670
+ contexts=contexts,
671
+ functions=functions,
672
+ expected_output=expected_output,
673
+ request=request,
674
+ response=response,
675
+ tags=tags,
676
+ )
677
+ return await api_instance.judges_execute_by_name_create(
678
+ name=name,
679
+ judge_execution_request=execution_request,
680
+ _request_timeout=_request_timeout,
681
+ )