scorable 1.6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scorable might be problematic. Click here for more details.
- root/__about__.py +4 -0
- root/__init__.py +17 -0
- root/client.py +207 -0
- root/datasets.py +231 -0
- root/execution_logs.py +162 -0
- root/generated/__init__.py +0 -0
- root/generated/openapi_aclient/__init__.py +1 -0
- root/generated/openapi_aclient/api/__init__.py +1 -0
- root/generated/openapi_aclient/api/datasets_api.py +1274 -0
- root/generated/openapi_aclient/api/evaluators_api.py +3641 -0
- root/generated/openapi_aclient/api/execution_logs_api.py +751 -0
- root/generated/openapi_aclient/api/judges_api.py +3794 -0
- root/generated/openapi_aclient/api/models_api.py +1473 -0
- root/generated/openapi_aclient/api/objectives_api.py +1767 -0
- root/generated/openapi_aclient/api_client.py +662 -0
- root/generated/openapi_aclient/api_response.py +22 -0
- root/generated/openapi_aclient/configuration.py +470 -0
- root/generated/openapi_aclient/exceptions.py +197 -0
- root/generated/openapi_aclient/models/__init__.py +122 -0
- root/generated/openapi_aclient/models/data_set_create.py +118 -0
- root/generated/openapi_aclient/models/data_set_create_request.py +105 -0
- root/generated/openapi_aclient/models/data_set_list.py +129 -0
- root/generated/openapi_aclient/models/data_set_type.py +36 -0
- root/generated/openapi_aclient/models/dataset_range_request.py +93 -0
- root/generated/openapi_aclient/models/evaluator.py +273 -0
- root/generated/openapi_aclient/models/evaluator_calibration_output.py +101 -0
- root/generated/openapi_aclient/models/evaluator_calibration_result.py +134 -0
- root/generated/openapi_aclient/models/evaluator_demonstrations.py +107 -0
- root/generated/openapi_aclient/models/evaluator_demonstrations_request.py +107 -0
- root/generated/openapi_aclient/models/evaluator_execution_function_parameter_property_request.py +86 -0
- root/generated/openapi_aclient/models/evaluator_execution_function_parameter_request.py +109 -0
- root/generated/openapi_aclient/models/evaluator_execution_function_parameter_type_enum.py +35 -0
- root/generated/openapi_aclient/models/evaluator_execution_function_request.py +99 -0
- root/generated/openapi_aclient/models/evaluator_execution_functions_request.py +98 -0
- root/generated/openapi_aclient/models/evaluator_execution_functions_type_enum.py +35 -0
- root/generated/openapi_aclient/models/evaluator_execution_request.py +134 -0
- root/generated/openapi_aclient/models/evaluator_execution_result.py +114 -0
- root/generated/openapi_aclient/models/evaluator_inputs_value.py +100 -0
- root/generated/openapi_aclient/models/evaluator_inputs_value_items.py +89 -0
- root/generated/openapi_aclient/models/evaluator_list_output.py +198 -0
- root/generated/openapi_aclient/models/evaluator_reference.py +90 -0
- root/generated/openapi_aclient/models/evaluator_reference_request.py +90 -0
- root/generated/openapi_aclient/models/evaluator_request.py +194 -0
- root/generated/openapi_aclient/models/evaluator_result.py +110 -0
- root/generated/openapi_aclient/models/execution_log_details.py +291 -0
- root/generated/openapi_aclient/models/execution_log_details_evaluation_context.py +83 -0
- root/generated/openapi_aclient/models/execution_log_details_evaluator_latencies_inner.py +83 -0
- root/generated/openapi_aclient/models/execution_log_list.py +217 -0
- root/generated/openapi_aclient/models/execution_log_list_evaluation_context.py +83 -0
- root/generated/openapi_aclient/models/generation_model_params_request.py +93 -0
- root/generated/openapi_aclient/models/id.py +87 -0
- root/generated/openapi_aclient/models/input_variable.py +121 -0
- root/generated/openapi_aclient/models/input_variable_request.py +82 -0
- root/generated/openapi_aclient/models/judge.py +178 -0
- root/generated/openapi_aclient/models/judge_execution_request.py +114 -0
- root/generated/openapi_aclient/models/judge_execution_response.py +97 -0
- root/generated/openapi_aclient/models/judge_files_inner.py +84 -0
- root/generated/openapi_aclient/models/judge_generator_request.py +142 -0
- root/generated/openapi_aclient/models/judge_generator_response.py +88 -0
- root/generated/openapi_aclient/models/judge_invite_request.py +87 -0
- root/generated/openapi_aclient/models/judge_list.py +156 -0
- root/generated/openapi_aclient/models/judge_rectifier_request_request.py +114 -0
- root/generated/openapi_aclient/models/judge_rectifier_response.py +121 -0
- root/generated/openapi_aclient/models/judge_request.py +108 -0
- root/generated/openapi_aclient/models/model.py +126 -0
- root/generated/openapi_aclient/models/model_list.py +115 -0
- root/generated/openapi_aclient/models/model_params.py +89 -0
- root/generated/openapi_aclient/models/model_params_request.py +89 -0
- root/generated/openapi_aclient/models/model_request.py +118 -0
- root/generated/openapi_aclient/models/nested_evaluator.py +110 -0
- root/generated/openapi_aclient/models/nested_evaluator_objective.py +87 -0
- root/generated/openapi_aclient/models/nested_evaluator_request.py +92 -0
- root/generated/openapi_aclient/models/nested_objective_evaluator.py +105 -0
- root/generated/openapi_aclient/models/nested_objective_evaluator_request.py +92 -0
- root/generated/openapi_aclient/models/nested_objective_list.py +111 -0
- root/generated/openapi_aclient/models/nested_user_details.py +88 -0
- root/generated/openapi_aclient/models/nested_user_details_request.py +82 -0
- root/generated/openapi_aclient/models/nested_vector_objective.py +88 -0
- root/generated/openapi_aclient/models/nested_vector_objective_request.py +82 -0
- root/generated/openapi_aclient/models/objective.py +157 -0
- root/generated/openapi_aclient/models/objective_list.py +128 -0
- root/generated/openapi_aclient/models/objective_request.py +113 -0
- root/generated/openapi_aclient/models/objective_validator.py +100 -0
- root/generated/openapi_aclient/models/objective_validator_request.py +90 -0
- root/generated/openapi_aclient/models/paginated_data_set_list_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_evaluator_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_evaluator_list_output_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_execution_log_list_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_judge_list_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_model_list_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_objective_list.py +111 -0
- root/generated/openapi_aclient/models/paginated_objective_list_list.py +111 -0
- root/generated/openapi_aclient/models/patched_evaluator_request.py +194 -0
- root/generated/openapi_aclient/models/patched_judge_request.py +110 -0
- root/generated/openapi_aclient/models/patched_model_request.py +118 -0
- root/generated/openapi_aclient/models/patched_objective_request.py +113 -0
- root/generated/openapi_aclient/models/provider.py +99 -0
- root/generated/openapi_aclient/models/reference_variable.py +123 -0
- root/generated/openapi_aclient/models/reference_variable_request.py +83 -0
- root/generated/openapi_aclient/models/skill_execution_validator_result.py +130 -0
- root/generated/openapi_aclient/models/skill_test_data_request.py +107 -0
- root/generated/openapi_aclient/models/skill_test_data_request_dataset_range.py +93 -0
- root/generated/openapi_aclient/models/skill_test_input_request.py +171 -0
- root/generated/openapi_aclient/models/skill_type_enum.py +36 -0
- root/generated/openapi_aclient/models/status_change.py +84 -0
- root/generated/openapi_aclient/models/status_change_request.py +84 -0
- root/generated/openapi_aclient/models/status_change_status_enum.py +36 -0
- root/generated/openapi_aclient/models/status_enum.py +38 -0
- root/generated/openapi_aclient/models/validation_result_status.py +36 -0
- root/generated/openapi_aclient/models/visibility_enum.py +38 -0
- root/generated/openapi_aclient/rest.py +166 -0
- root/generated/openapi_aclient_README.md +239 -0
- root/generated/openapi_client/__init__.py +1 -0
- root/generated/openapi_client/api/__init__.py +1 -0
- root/generated/openapi_client/api/datasets_api.py +1274 -0
- root/generated/openapi_client/api/evaluators_api.py +3641 -0
- root/generated/openapi_client/api/execution_logs_api.py +751 -0
- root/generated/openapi_client/api/judges_api.py +3794 -0
- root/generated/openapi_client/api/models_api.py +1473 -0
- root/generated/openapi_client/api/objectives_api.py +1767 -0
- root/generated/openapi_client/api_client.py +659 -0
- root/generated/openapi_client/api_response.py +22 -0
- root/generated/openapi_client/configuration.py +474 -0
- root/generated/openapi_client/exceptions.py +197 -0
- root/generated/openapi_client/models/__init__.py +120 -0
- root/generated/openapi_client/models/data_set_create.py +118 -0
- root/generated/openapi_client/models/data_set_create_request.py +105 -0
- root/generated/openapi_client/models/data_set_list.py +129 -0
- root/generated/openapi_client/models/data_set_type.py +36 -0
- root/generated/openapi_client/models/dataset_range_request.py +93 -0
- root/generated/openapi_client/models/evaluator.py +273 -0
- root/generated/openapi_client/models/evaluator_calibration_output.py +101 -0
- root/generated/openapi_client/models/evaluator_calibration_result.py +134 -0
- root/generated/openapi_client/models/evaluator_demonstrations.py +107 -0
- root/generated/openapi_client/models/evaluator_demonstrations_request.py +107 -0
- root/generated/openapi_client/models/evaluator_execution_function_parameter_property_request.py +86 -0
- root/generated/openapi_client/models/evaluator_execution_function_parameter_request.py +109 -0
- root/generated/openapi_client/models/evaluator_execution_function_parameter_type_enum.py +35 -0
- root/generated/openapi_client/models/evaluator_execution_function_request.py +99 -0
- root/generated/openapi_client/models/evaluator_execution_functions_request.py +98 -0
- root/generated/openapi_client/models/evaluator_execution_functions_type_enum.py +35 -0
- root/generated/openapi_client/models/evaluator_execution_request.py +134 -0
- root/generated/openapi_client/models/evaluator_execution_result.py +114 -0
- root/generated/openapi_client/models/evaluator_inputs_value.py +100 -0
- root/generated/openapi_client/models/evaluator_inputs_value_items.py +89 -0
- root/generated/openapi_client/models/evaluator_list_output.py +198 -0
- root/generated/openapi_client/models/evaluator_reference.py +90 -0
- root/generated/openapi_client/models/evaluator_reference_request.py +90 -0
- root/generated/openapi_client/models/evaluator_request.py +194 -0
- root/generated/openapi_client/models/evaluator_result.py +110 -0
- root/generated/openapi_client/models/execution_log_details.py +291 -0
- root/generated/openapi_client/models/execution_log_details_evaluation_context.py +83 -0
- root/generated/openapi_client/models/execution_log_details_evaluator_latencies_inner.py +83 -0
- root/generated/openapi_client/models/execution_log_list.py +215 -0
- root/generated/openapi_client/models/execution_log_list_evaluation_context.py +83 -0
- root/generated/openapi_client/models/generation_model_params_request.py +93 -0
- root/generated/openapi_client/models/id.py +87 -0
- root/generated/openapi_client/models/input_variable.py +121 -0
- root/generated/openapi_client/models/input_variable_request.py +82 -0
- root/generated/openapi_client/models/judge.py +178 -0
- root/generated/openapi_client/models/judge_execution_request.py +114 -0
- root/generated/openapi_client/models/judge_execution_response.py +97 -0
- root/generated/openapi_client/models/judge_files_inner.py +84 -0
- root/generated/openapi_client/models/judge_generator_request.py +142 -0
- root/generated/openapi_client/models/judge_generator_response.py +88 -0
- root/generated/openapi_client/models/judge_invite_request.py +87 -0
- root/generated/openapi_client/models/judge_list.py +156 -0
- root/generated/openapi_client/models/judge_rectifier_request_request.py +114 -0
- root/generated/openapi_client/models/judge_rectifier_response.py +121 -0
- root/generated/openapi_client/models/judge_request.py +108 -0
- root/generated/openapi_client/models/model.py +126 -0
- root/generated/openapi_client/models/model_list.py +115 -0
- root/generated/openapi_client/models/model_params.py +89 -0
- root/generated/openapi_client/models/model_params_request.py +89 -0
- root/generated/openapi_client/models/model_request.py +118 -0
- root/generated/openapi_client/models/nested_evaluator.py +110 -0
- root/generated/openapi_client/models/nested_evaluator_objective.py +87 -0
- root/generated/openapi_client/models/nested_evaluator_request.py +92 -0
- root/generated/openapi_client/models/nested_objective_evaluator.py +105 -0
- root/generated/openapi_client/models/nested_objective_evaluator_request.py +92 -0
- root/generated/openapi_client/models/nested_objective_list.py +111 -0
- root/generated/openapi_client/models/nested_user_details.py +88 -0
- root/generated/openapi_client/models/nested_user_details_request.py +82 -0
- root/generated/openapi_client/models/nested_vector_objective.py +88 -0
- root/generated/openapi_client/models/nested_vector_objective_request.py +82 -0
- root/generated/openapi_client/models/objective.py +157 -0
- root/generated/openapi_client/models/objective_list.py +128 -0
- root/generated/openapi_client/models/objective_request.py +113 -0
- root/generated/openapi_client/models/objective_validator.py +100 -0
- root/generated/openapi_client/models/objective_validator_request.py +90 -0
- root/generated/openapi_client/models/paginated_data_set_list_list.py +111 -0
- root/generated/openapi_client/models/paginated_evaluator_list.py +111 -0
- root/generated/openapi_client/models/paginated_evaluator_list_output_list.py +111 -0
- root/generated/openapi_client/models/paginated_execution_log_list_list.py +111 -0
- root/generated/openapi_client/models/paginated_judge_list_list.py +111 -0
- root/generated/openapi_client/models/paginated_model_list_list.py +111 -0
- root/generated/openapi_client/models/paginated_objective_list.py +111 -0
- root/generated/openapi_client/models/paginated_objective_list_list.py +111 -0
- root/generated/openapi_client/models/patched_evaluator_request.py +194 -0
- root/generated/openapi_client/models/patched_judge_request.py +110 -0
- root/generated/openapi_client/models/patched_model_request.py +118 -0
- root/generated/openapi_client/models/patched_objective_request.py +113 -0
- root/generated/openapi_client/models/provider.py +99 -0
- root/generated/openapi_client/models/reference_variable.py +123 -0
- root/generated/openapi_client/models/reference_variable_request.py +83 -0
- root/generated/openapi_client/models/skill_execution_validator_result.py +130 -0
- root/generated/openapi_client/models/skill_test_data_request.py +107 -0
- root/generated/openapi_client/models/skill_test_data_request_dataset_range.py +93 -0
- root/generated/openapi_client/models/skill_test_input_request.py +171 -0
- root/generated/openapi_client/models/skill_type_enum.py +36 -0
- root/generated/openapi_client/models/status_change.py +84 -0
- root/generated/openapi_client/models/status_change_request.py +84 -0
- root/generated/openapi_client/models/status_change_status_enum.py +36 -0
- root/generated/openapi_client/models/status_enum.py +38 -0
- root/generated/openapi_client/models/validation_result_status.py +36 -0
- root/generated/openapi_client/models/visibility_enum.py +38 -0
- root/generated/openapi_client/rest.py +203 -0
- root/generated/openapi_client_README.md +238 -0
- root/judges.py +681 -0
- root/models.py +197 -0
- root/objectives.py +343 -0
- root/py.typed +0 -0
- root/skills.py +1707 -0
- root/utils.py +90 -0
- scorable-1.6.4.dist-info/METADATA +395 -0
- scorable-1.6.4.dist-info/RECORD +228 -0
- scorable-1.6.4.dist-info/WHEEL +4 -0
- scorable-1.6.4.dist-info/licenses/LICENSE +202 -0
root/judges.py
ADDED
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import AbstractAsyncContextManager
|
|
4
|
+
from functools import partial
|
|
5
|
+
from typing import AsyncIterator, Dict, Iterator, List, Literal, Optional, Union, cast
|
|
6
|
+
|
|
7
|
+
from pydantic import StrictStr
|
|
8
|
+
|
|
9
|
+
from root.generated.openapi_aclient.models.judge_generator_request import (
|
|
10
|
+
JudgeGeneratorRequest as AJudgeGeneratorRequest,
|
|
11
|
+
)
|
|
12
|
+
from root.generated.openapi_aclient.models.judge_generator_response import (
|
|
13
|
+
JudgeGeneratorResponse as AJudgeGeneratorResponse,
|
|
14
|
+
)
|
|
15
|
+
from root.generated.openapi_aclient.models.judge_request import (
|
|
16
|
+
JudgeRequest as AJudgeRequest,
|
|
17
|
+
)
|
|
18
|
+
from root.generated.openapi_aclient.models.status_enum import (
|
|
19
|
+
StatusEnum as AStatusEnum,
|
|
20
|
+
)
|
|
21
|
+
from root.generated.openapi_aclient.models.visibility_enum import VisibilityEnum as AVisibilityEnum
|
|
22
|
+
from root.generated.openapi_client.models.judge_generator_request import JudgeGeneratorRequest
|
|
23
|
+
from root.generated.openapi_client.models.judge_generator_response import JudgeGeneratorResponse
|
|
24
|
+
from root.generated.openapi_client.models.judge_request import JudgeRequest
|
|
25
|
+
from root.generated.openapi_client.models.status_enum import StatusEnum
|
|
26
|
+
from root.generated.openapi_client.models.visibility_enum import VisibilityEnum
|
|
27
|
+
|
|
28
|
+
from .generated.openapi_aclient import ApiClient as AApiClient
|
|
29
|
+
from .generated.openapi_aclient.api.judges_api import JudgesApi as AJudgesApi
|
|
30
|
+
from .generated.openapi_aclient.models.evaluator_execution_functions_request import (
|
|
31
|
+
EvaluatorExecutionFunctionsRequest as AEvaluatorExecutionFunctionsRequest,
|
|
32
|
+
)
|
|
33
|
+
from .generated.openapi_aclient.models.evaluator_reference_request import (
|
|
34
|
+
EvaluatorReferenceRequest as AEvaluatorReferenceRequest,
|
|
35
|
+
)
|
|
36
|
+
from .generated.openapi_aclient.models.judge import Judge as AOpenApiJudge
|
|
37
|
+
from .generated.openapi_aclient.models.judge_execution_request import (
|
|
38
|
+
JudgeExecutionRequest as AJudgeExecutionRequest,
|
|
39
|
+
)
|
|
40
|
+
from .generated.openapi_aclient.models.judge_execution_response import (
|
|
41
|
+
JudgeExecutionResponse as AJudgeExecutionResponse,
|
|
42
|
+
)
|
|
43
|
+
from .generated.openapi_aclient.models.judge_list import JudgeList as AJudgeList
|
|
44
|
+
from .generated.openapi_aclient.models.paginated_judge_list_list import (
|
|
45
|
+
PaginatedJudgeListList as APaginatedJudgeListList,
|
|
46
|
+
)
|
|
47
|
+
from .generated.openapi_aclient.models.patched_judge_request import (
|
|
48
|
+
PatchedJudgeRequest as APatchedJudgeRequest,
|
|
49
|
+
)
|
|
50
|
+
from .generated.openapi_client import ApiClient
|
|
51
|
+
from .generated.openapi_client.api.judges_api import JudgesApi
|
|
52
|
+
from .generated.openapi_client.models.evaluator_execution_functions_request import (
|
|
53
|
+
EvaluatorExecutionFunctionsRequest,
|
|
54
|
+
)
|
|
55
|
+
from .generated.openapi_client.models.evaluator_reference_request import EvaluatorReferenceRequest
|
|
56
|
+
from .generated.openapi_client.models.judge import Judge as OpenApiJudge
|
|
57
|
+
from .generated.openapi_client.models.judge_execution_request import JudgeExecutionRequest
|
|
58
|
+
from .generated.openapi_client.models.judge_execution_response import JudgeExecutionResponse
|
|
59
|
+
from .generated.openapi_client.models.judge_list import JudgeList
|
|
60
|
+
from .generated.openapi_client.models.paginated_judge_list_list import PaginatedJudgeListList
|
|
61
|
+
from .generated.openapi_client.models.patched_judge_request import PatchedJudgeRequest
|
|
62
|
+
from .utils import ClientContextCallable, with_async_client, with_sync_client
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Judge(OpenApiJudge):
|
|
66
|
+
"""Wrapper for a single Judge.
|
|
67
|
+
|
|
68
|
+
For available attributes, please check the (automatically
|
|
69
|
+
generated) superclass documentation.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
client_context: ClientContextCallable
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def _wrap(cls, apiobj: Union[OpenApiJudge, JudgeList], client_context: ClientContextCallable) -> Judge:
|
|
76
|
+
"""Wrap API object into a Judge instance."""
|
|
77
|
+
if not isinstance(apiobj, (OpenApiJudge, JudgeList)):
|
|
78
|
+
raise ValueError(f"Wrong instance in _wrap: {apiobj!r}")
|
|
79
|
+
obj = cast(Judge, apiobj)
|
|
80
|
+
obj.__class__ = cls
|
|
81
|
+
obj.client_context = client_context
|
|
82
|
+
return obj
|
|
83
|
+
|
|
84
|
+
@with_sync_client
|
|
85
|
+
def run(
|
|
86
|
+
self,
|
|
87
|
+
*,
|
|
88
|
+
response: str,
|
|
89
|
+
request: Optional[str] = None,
|
|
90
|
+
contexts: Optional[List[str]] = None,
|
|
91
|
+
functions: Optional[List[EvaluatorExecutionFunctionsRequest]] = None,
|
|
92
|
+
expected_output: Optional[str] = None,
|
|
93
|
+
tags: Optional[List[str]] = None,
|
|
94
|
+
_request_timeout: Optional[int] = None,
|
|
95
|
+
_client: ApiClient,
|
|
96
|
+
) -> JudgeExecutionResponse:
|
|
97
|
+
"""
|
|
98
|
+
Run the judge.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
response: LLM output to evaluate
|
|
102
|
+
request: The prompt sent to the LLM. Optional.
|
|
103
|
+
contexts: Optional documents passed to RAG evaluators
|
|
104
|
+
functions: Optional functions to execute
|
|
105
|
+
expected_output: Optional expected output
|
|
106
|
+
tags: Optional tags to add to the judge execution
|
|
107
|
+
_request_timeout: Optional timeout for the request
|
|
108
|
+
"""
|
|
109
|
+
api_instance = JudgesApi(_client)
|
|
110
|
+
execution_request = JudgeExecutionRequest(
|
|
111
|
+
request=request,
|
|
112
|
+
response=response,
|
|
113
|
+
contexts=contexts,
|
|
114
|
+
functions=functions,
|
|
115
|
+
expected_output=expected_output,
|
|
116
|
+
tags=tags,
|
|
117
|
+
)
|
|
118
|
+
return api_instance.judges_execute_create(
|
|
119
|
+
judge_id=self.id,
|
|
120
|
+
judge_execution_request=execution_request,
|
|
121
|
+
_request_timeout=_request_timeout,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class AJudge(AOpenApiJudge):
|
|
126
|
+
"""
|
|
127
|
+
Async wrapper for a single Judge.
|
|
128
|
+
|
|
129
|
+
For available attributes, please check the (automatically
|
|
130
|
+
generated) superclass documentation.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
client_context: ClientContextCallable
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
async def _awrap(cls, apiobj: Union[AOpenApiJudge, AJudgeList], client_context: ClientContextCallable) -> AJudge:
|
|
137
|
+
if not isinstance(apiobj, (AOpenApiJudge, AJudgeList)):
|
|
138
|
+
raise ValueError(f"Wrong instance in _wrap: {apiobj!r}")
|
|
139
|
+
obj = cast(AJudge, apiobj)
|
|
140
|
+
obj.__class__ = cls
|
|
141
|
+
obj.client_context = client_context
|
|
142
|
+
return obj
|
|
143
|
+
|
|
144
|
+
@with_async_client
|
|
145
|
+
async def arun(
|
|
146
|
+
self,
|
|
147
|
+
*,
|
|
148
|
+
response: str,
|
|
149
|
+
request: Optional[str] = None,
|
|
150
|
+
contexts: Optional[List[str]] = None,
|
|
151
|
+
functions: Optional[List[AEvaluatorExecutionFunctionsRequest]] = None,
|
|
152
|
+
expected_output: Optional[str] = None,
|
|
153
|
+
tags: Optional[List[str]] = None,
|
|
154
|
+
_request_timeout: Optional[int] = None,
|
|
155
|
+
_client: AApiClient,
|
|
156
|
+
) -> AJudgeExecutionResponse:
|
|
157
|
+
"""
|
|
158
|
+
Asynchronously run the judge.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
response: LLM output to evaluate
|
|
162
|
+
request: The prompt sent to the LLM. Optional.
|
|
163
|
+
contexts: Optional documents passed to RAG evaluators
|
|
164
|
+
functions: Optional functions to execute
|
|
165
|
+
expected_output: Optional expected output
|
|
166
|
+
tags: Optional tags to add to the judge execution
|
|
167
|
+
_request_timeout: Optional timeout for the request
|
|
168
|
+
"""
|
|
169
|
+
api_instance = AJudgesApi(_client)
|
|
170
|
+
execution_request = AJudgeExecutionRequest(
|
|
171
|
+
contexts=contexts,
|
|
172
|
+
functions=functions,
|
|
173
|
+
expected_output=expected_output,
|
|
174
|
+
request=request,
|
|
175
|
+
response=response,
|
|
176
|
+
tags=tags,
|
|
177
|
+
)
|
|
178
|
+
return await api_instance.judges_execute_create(
|
|
179
|
+
judge_id=self.id,
|
|
180
|
+
judge_execution_request=execution_request,
|
|
181
|
+
_request_timeout=_request_timeout,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class Judges:
|
|
186
|
+
"""
|
|
187
|
+
Judges API
|
|
188
|
+
|
|
189
|
+
Note:
|
|
190
|
+
The construction of the API instance should be handled by
|
|
191
|
+
accessing an attribute of a :class:`root.client.RootSignals` instance.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
def __init__(self, client_context: ClientContextCallable):
|
|
195
|
+
self.client_context = client_context
|
|
196
|
+
|
|
197
|
+
@with_sync_client
|
|
198
|
+
def generate(
|
|
199
|
+
self,
|
|
200
|
+
*,
|
|
201
|
+
intent: str,
|
|
202
|
+
visibility: Literal["public", "unlisted"] = "unlisted",
|
|
203
|
+
stage: Optional[str] = None,
|
|
204
|
+
extra_contexts: Optional[Dict[str, str | None]] = None,
|
|
205
|
+
strict: bool = False,
|
|
206
|
+
_request_timeout: Optional[int] = None,
|
|
207
|
+
_client: ApiClient,
|
|
208
|
+
) -> JudgeGeneratorResponse:
|
|
209
|
+
"""
|
|
210
|
+
Generate a judge.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
intent: Describe what you want the judge to build for.
|
|
214
|
+
Example: I am building a chatbot for ecommerce and I would like to measure the quality of the responses.
|
|
215
|
+
visibility: Whether the judge should be visible to everyone or only to your organization.
|
|
216
|
+
stage: If the intent is ambiguous, you can specify the stage of the judge.
|
|
217
|
+
Example: For a chatbot judge, we can specify the stage to be "response generation".
|
|
218
|
+
extra_contexts: Extra contexts to be passed to the judge.
|
|
219
|
+
Example: {"domain": "Ecommerce selling clothing"}, {"audience": "Women aged 25-35"}
|
|
220
|
+
strict: Whether to fail generation if the intent is ambiguous.
|
|
221
|
+
_request_timeout: Optional timeout for the request
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Wrapper for the judge id and optionally an error code if the generation failed.
|
|
225
|
+
"""
|
|
226
|
+
api_instance = JudgesApi(_client)
|
|
227
|
+
judge_request = JudgeGeneratorRequest(
|
|
228
|
+
intent=intent,
|
|
229
|
+
stage=stage,
|
|
230
|
+
extra_contexts=extra_contexts,
|
|
231
|
+
strict=strict,
|
|
232
|
+
visibility=VisibilityEnum.GLOBAL if visibility == "public" else VisibilityEnum.UNLISTED,
|
|
233
|
+
)
|
|
234
|
+
return api_instance.judges_generate_create(
|
|
235
|
+
judge_generator_request=judge_request, _request_timeout=_request_timeout
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@with_async_client
|
|
239
|
+
async def agenerate(
|
|
240
|
+
self,
|
|
241
|
+
*,
|
|
242
|
+
intent: str,
|
|
243
|
+
visibility: Literal["public", "unlisted"] = "unlisted",
|
|
244
|
+
stage: Optional[str] = None,
|
|
245
|
+
extra_contexts: Optional[Dict[str, str | None]] = None,
|
|
246
|
+
strict: bool = False,
|
|
247
|
+
_request_timeout: Optional[int] = None,
|
|
248
|
+
_client: AApiClient,
|
|
249
|
+
) -> AJudgeGeneratorResponse:
|
|
250
|
+
"""
|
|
251
|
+
Asynchronously generate a judge.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
intent: Describe what you want the judge to build for.
|
|
255
|
+
Example: I am building a chatbot for ecommerce and I would like to measure the quality of the responses.
|
|
256
|
+
visibility: Whether the judge should be visible to everyone or only to your organization.
|
|
257
|
+
stage: If the intent is ambiguous, you can specify the stage of the judge.
|
|
258
|
+
Example: For a chatbot judge, we can specify the stage to be "response generation".
|
|
259
|
+
extra_contexts: Extra contexts to be passed to the judge.
|
|
260
|
+
Example: {"domain": "Ecommerce selling clothing"}, {"audience": "Women aged 25-35"}
|
|
261
|
+
strict: Whether to fail generation if the intent is ambiguous.
|
|
262
|
+
_request_timeout: Optional timeout for the request
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Wrapper for the judge id and optionally an error code if the generation failed.
|
|
266
|
+
"""
|
|
267
|
+
api_instance = AJudgesApi(_client)
|
|
268
|
+
judge_request = AJudgeGeneratorRequest(
|
|
269
|
+
intent=intent,
|
|
270
|
+
stage=stage,
|
|
271
|
+
extra_contexts=extra_contexts,
|
|
272
|
+
strict=strict,
|
|
273
|
+
visibility=AVisibilityEnum.GLOBAL if visibility == "public" else AVisibilityEnum.UNLISTED,
|
|
274
|
+
)
|
|
275
|
+
return await api_instance.judges_generate_create(
|
|
276
|
+
judge_generator_request=judge_request, _request_timeout=_request_timeout
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
@with_sync_client
|
|
280
|
+
def create(
|
|
281
|
+
self,
|
|
282
|
+
*,
|
|
283
|
+
name: str,
|
|
284
|
+
intent: str,
|
|
285
|
+
evaluator_references: Optional[List[EvaluatorReferenceRequest]] = None,
|
|
286
|
+
stage: Optional[str] = None,
|
|
287
|
+
status: Literal["unlisted", "listed", "public", "public_unlisted"] = "unlisted",
|
|
288
|
+
_request_timeout: Optional[int] = None,
|
|
289
|
+
_client: ApiClient,
|
|
290
|
+
) -> Judge:
|
|
291
|
+
"""
|
|
292
|
+
Create a new judge with a name, intent, and list of evaluators.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
name: Name for the judge
|
|
296
|
+
intent: Intent for the judge
|
|
297
|
+
evaluator_references: List of evaluator references to include in the judge
|
|
298
|
+
stage: Stage for the judge
|
|
299
|
+
status: Status of the judge
|
|
300
|
+
_request_timeout: Optional timeout for the request
|
|
301
|
+
"""
|
|
302
|
+
api_instance = JudgesApi(_client)
|
|
303
|
+
request = JudgeRequest(
|
|
304
|
+
name=name,
|
|
305
|
+
intent=intent,
|
|
306
|
+
evaluator_references=evaluator_references,
|
|
307
|
+
stage=stage,
|
|
308
|
+
status=StatusEnum(status),
|
|
309
|
+
)
|
|
310
|
+
return Judge._wrap(
|
|
311
|
+
api_instance.judges_create(judge_request=request, _request_timeout=_request_timeout),
|
|
312
|
+
client_context=self.client_context,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
@with_async_client
|
|
316
|
+
async def acreate(
|
|
317
|
+
self,
|
|
318
|
+
*,
|
|
319
|
+
name: str,
|
|
320
|
+
intent: str,
|
|
321
|
+
evaluator_references: Optional[List[AEvaluatorReferenceRequest]] = None,
|
|
322
|
+
stage: Optional[str] = None,
|
|
323
|
+
status: Literal["unlisted", "listed", "public", "public_unlisted"] = "unlisted",
|
|
324
|
+
_request_timeout: Optional[int] = None,
|
|
325
|
+
_client: AApiClient,
|
|
326
|
+
) -> AJudge:
|
|
327
|
+
"""
|
|
328
|
+
Asynchronously create a new judge with a name, intent, and list of evaluators.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
name: Name for the judge
|
|
332
|
+
intent: Intent for the judge
|
|
333
|
+
evaluator_references: List of evaluator references to include in the judge
|
|
334
|
+
stage: Stage for the judge
|
|
335
|
+
status: Status of the judge
|
|
336
|
+
_request_timeout: Optional timeout for the request
|
|
337
|
+
"""
|
|
338
|
+
api_instance = AJudgesApi(_client)
|
|
339
|
+
request = AJudgeRequest(
|
|
340
|
+
name=name,
|
|
341
|
+
intent=intent,
|
|
342
|
+
evaluator_references=evaluator_references,
|
|
343
|
+
stage=stage,
|
|
344
|
+
status=AStatusEnum(status),
|
|
345
|
+
)
|
|
346
|
+
return await AJudge._awrap(
|
|
347
|
+
await api_instance.judges_create(judge_request=request, _request_timeout=_request_timeout),
|
|
348
|
+
client_context=self.client_context,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
@with_sync_client
|
|
352
|
+
def get(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: ApiClient) -> Judge:
|
|
353
|
+
"""
|
|
354
|
+
Get a judge by ID.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
judge_id: The judge to be fetched.
|
|
358
|
+
"""
|
|
359
|
+
api_instance = JudgesApi(_client)
|
|
360
|
+
return Judge._wrap(
|
|
361
|
+
api_instance.judges_retrieve(id=judge_id, _request_timeout=_request_timeout),
|
|
362
|
+
client_context=self.client_context,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
@with_async_client
|
|
366
|
+
async def aget(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: AApiClient) -> AJudge:
|
|
367
|
+
"""
|
|
368
|
+
Asynchronously get a judge by ID.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
judge_id: The judge to be fetched.
|
|
372
|
+
"""
|
|
373
|
+
api_instance = AJudgesApi(_client)
|
|
374
|
+
return await AJudge._awrap(
|
|
375
|
+
await api_instance.judges_retrieve(id=judge_id, _request_timeout=_request_timeout),
|
|
376
|
+
client_context=self.client_context,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
@with_sync_client
|
|
380
|
+
def delete(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: ApiClient) -> None:
|
|
381
|
+
"""
|
|
382
|
+
Delete the judge.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
judge_id: The judge to be deleted.
|
|
386
|
+
"""
|
|
387
|
+
api_instance = JudgesApi(_client)
|
|
388
|
+
return api_instance.judges_destroy(id=judge_id, _request_timeout=_request_timeout)
|
|
389
|
+
|
|
390
|
+
@with_async_client
|
|
391
|
+
async def adelete(self, judge_id: str, *, _request_timeout: Optional[int] = None, _client: AApiClient) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Asynchronously delete the judge.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
judge_id: The judge to be deleted.
|
|
397
|
+
"""
|
|
398
|
+
api_instance = AJudgesApi(_client)
|
|
399
|
+
return await api_instance.judges_destroy(id=judge_id, _request_timeout=_request_timeout)
|
|
400
|
+
|
|
401
|
+
@with_sync_client
|
|
402
|
+
def list(self, *, limit: int = 100, _client: ApiClient) -> Iterator[Judge]:
|
|
403
|
+
"""
|
|
404
|
+
Iterate through the judges.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
limit: Number of entries to iterate through at most.
|
|
408
|
+
"""
|
|
409
|
+
api_instance = JudgesApi(_client)
|
|
410
|
+
cursor: Optional[StrictStr] = None
|
|
411
|
+
while limit > 0:
|
|
412
|
+
result: PaginatedJudgeListList = api_instance.judges_list(page_size=limit, cursor=cursor)
|
|
413
|
+
if not result.results:
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
used_results = result.results[:limit]
|
|
417
|
+
limit -= len(used_results)
|
|
418
|
+
for judge in used_results:
|
|
419
|
+
yield Judge._wrap(judge, client_context=self.client_context)
|
|
420
|
+
|
|
421
|
+
if not (cursor := result.next):
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
async def alist(self, *, limit: int = 100) -> AsyncIterator[AJudge]:
|
|
425
|
+
"""
|
|
426
|
+
Asynchronously iterate through the judges.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
limit: Number of entries to iterate through at most.
|
|
430
|
+
"""
|
|
431
|
+
context = self.client_context()
|
|
432
|
+
assert isinstance(context, AbstractAsyncContextManager), "This method is not available in synchronous mode"
|
|
433
|
+
async with context as client:
|
|
434
|
+
api_instance = AJudgesApi(client)
|
|
435
|
+
partial_list = partial(api_instance.judges_list)
|
|
436
|
+
|
|
437
|
+
cursor: Optional[StrictStr] = None
|
|
438
|
+
while limit > 0:
|
|
439
|
+
result: APaginatedJudgeListList = await partial_list(page_size=limit, cursor=cursor)
|
|
440
|
+
if not result.results:
|
|
441
|
+
return
|
|
442
|
+
|
|
443
|
+
used_results = result.results[:limit]
|
|
444
|
+
limit -= len(used_results)
|
|
445
|
+
for judge in used_results:
|
|
446
|
+
yield await AJudge._awrap(judge, client_context=self.client_context)
|
|
447
|
+
|
|
448
|
+
if not (cursor := result.next):
|
|
449
|
+
return
|
|
450
|
+
|
|
451
|
+
@with_sync_client
|
|
452
|
+
def update(
|
|
453
|
+
self,
|
|
454
|
+
judge_id: str,
|
|
455
|
+
*,
|
|
456
|
+
name: Optional[str] = None,
|
|
457
|
+
evaluator_references: Optional[List[EvaluatorReferenceRequest]] = None,
|
|
458
|
+
_request_timeout: Optional[int] = None,
|
|
459
|
+
_client: ApiClient,
|
|
460
|
+
) -> Judge:
|
|
461
|
+
"""
|
|
462
|
+
Update an existing judge.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
judge_id: The judge to be updated.
|
|
466
|
+
name: New name for the judge
|
|
467
|
+
evaluator_references: New list of evaluator references
|
|
468
|
+
"""
|
|
469
|
+
api_instance = JudgesApi(_client)
|
|
470
|
+
request = PatchedJudgeRequest(
|
|
471
|
+
name=name,
|
|
472
|
+
evaluator_references=evaluator_references,
|
|
473
|
+
)
|
|
474
|
+
return Judge._wrap(
|
|
475
|
+
api_instance.judges_partial_update(
|
|
476
|
+
id=judge_id,
|
|
477
|
+
patched_judge_request=request,
|
|
478
|
+
_request_timeout=_request_timeout,
|
|
479
|
+
),
|
|
480
|
+
client_context=self.client_context,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
@with_async_client
|
|
484
|
+
async def aupdate(
|
|
485
|
+
self,
|
|
486
|
+
judge_id: str,
|
|
487
|
+
*,
|
|
488
|
+
name: Optional[str] = None,
|
|
489
|
+
evaluator_references: Optional[List[AEvaluatorReferenceRequest]] = None,
|
|
490
|
+
_request_timeout: Optional[int] = None,
|
|
491
|
+
_client: AApiClient,
|
|
492
|
+
) -> AJudge:
|
|
493
|
+
"""
|
|
494
|
+
Asynchronously update an existing judge.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
judge_id: The judge to be updated.
|
|
498
|
+
name: New name for the judge
|
|
499
|
+
evaluator_references: New list of evaluator references
|
|
500
|
+
"""
|
|
501
|
+
api_instance = AJudgesApi(_client)
|
|
502
|
+
request = APatchedJudgeRequest(
|
|
503
|
+
name=name,
|
|
504
|
+
evaluator_references=evaluator_references,
|
|
505
|
+
)
|
|
506
|
+
return await AJudge._awrap(
|
|
507
|
+
await api_instance.judges_partial_update(
|
|
508
|
+
id=judge_id,
|
|
509
|
+
patched_judge_request=request,
|
|
510
|
+
_request_timeout=_request_timeout,
|
|
511
|
+
),
|
|
512
|
+
client_context=self.client_context,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
@with_sync_client
|
|
516
|
+
def run(
|
|
517
|
+
self,
|
|
518
|
+
judge_id: str,
|
|
519
|
+
*,
|
|
520
|
+
response: str,
|
|
521
|
+
request: Optional[str] = None,
|
|
522
|
+
contexts: Optional[List[str]] = None,
|
|
523
|
+
functions: Optional[List[EvaluatorExecutionFunctionsRequest]] = None,
|
|
524
|
+
expected_output: Optional[str] = None,
|
|
525
|
+
tags: Optional[List[str]] = None,
|
|
526
|
+
_request_timeout: Optional[int] = None,
|
|
527
|
+
_client: ApiClient,
|
|
528
|
+
) -> JudgeExecutionResponse:
|
|
529
|
+
"""
|
|
530
|
+
Run a judge directly by ID.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
judge_id: ID of the judge to run
|
|
534
|
+
response: LLM output to evaluate
|
|
535
|
+
request: The prompt sent to the LLM. Optional.
|
|
536
|
+
contexts: Optional documents passed to RAG evaluators
|
|
537
|
+
functions: Optional functions to execute
|
|
538
|
+
expected_output: Optional expected output
|
|
539
|
+
tags: Optional tags to add to the judge execution
|
|
540
|
+
_request_timeout: Optional timeout for the request
|
|
541
|
+
"""
|
|
542
|
+
api_instance = JudgesApi(_client)
|
|
543
|
+
execution_request = JudgeExecutionRequest(
|
|
544
|
+
request=request,
|
|
545
|
+
response=response,
|
|
546
|
+
contexts=contexts,
|
|
547
|
+
functions=functions,
|
|
548
|
+
expected_output=expected_output,
|
|
549
|
+
tags=tags,
|
|
550
|
+
)
|
|
551
|
+
return api_instance.judges_execute_create(
|
|
552
|
+
judge_id=judge_id,
|
|
553
|
+
judge_execution_request=execution_request,
|
|
554
|
+
_request_timeout=_request_timeout,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
@with_async_client
|
|
558
|
+
async def arun(
|
|
559
|
+
self,
|
|
560
|
+
judge_id: str,
|
|
561
|
+
*,
|
|
562
|
+
response: str,
|
|
563
|
+
request: Optional[str] = None,
|
|
564
|
+
contexts: Optional[List[str]] = None,
|
|
565
|
+
functions: Optional[List[AEvaluatorExecutionFunctionsRequest]] = None,
|
|
566
|
+
expected_output: Optional[str] = None,
|
|
567
|
+
tags: Optional[List[str]] = None,
|
|
568
|
+
_request_timeout: Optional[int] = None,
|
|
569
|
+
_client: AApiClient,
|
|
570
|
+
) -> AJudgeExecutionResponse:
|
|
571
|
+
"""
|
|
572
|
+
Asynchronously run a judge directly by ID.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
judge_id: ID of the judge to run
|
|
576
|
+
response: LLM output to evaluate
|
|
577
|
+
request: The prompt sent to the LLM. Optional.
|
|
578
|
+
contexts: Optional documents passed to RAG evaluators
|
|
579
|
+
functions: Optional functions to execute
|
|
580
|
+
expected_output: Optional expected output
|
|
581
|
+
tags: Optional tags to add to the judge execution
|
|
582
|
+
_request_timeout: Optional timeout for the request
|
|
583
|
+
"""
|
|
584
|
+
api_instance = AJudgesApi(_client)
|
|
585
|
+
execution_request = AJudgeExecutionRequest(
|
|
586
|
+
contexts=contexts,
|
|
587
|
+
functions=functions,
|
|
588
|
+
expected_output=expected_output,
|
|
589
|
+
request=request,
|
|
590
|
+
response=response,
|
|
591
|
+
tags=tags,
|
|
592
|
+
)
|
|
593
|
+
return await api_instance.judges_execute_create(
|
|
594
|
+
judge_id=judge_id,
|
|
595
|
+
judge_execution_request=execution_request,
|
|
596
|
+
_request_timeout=_request_timeout,
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
@with_sync_client
|
|
600
|
+
def run_by_name(
|
|
601
|
+
self,
|
|
602
|
+
name: str,
|
|
603
|
+
*,
|
|
604
|
+
response: str,
|
|
605
|
+
request: Optional[str] = None,
|
|
606
|
+
contexts: Optional[List[str]] = None,
|
|
607
|
+
functions: Optional[List[EvaluatorExecutionFunctionsRequest]] = None,
|
|
608
|
+
expected_output: Optional[str] = None,
|
|
609
|
+
tags: Optional[List[str]] = None,
|
|
610
|
+
_request_timeout: Optional[int] = None,
|
|
611
|
+
_client: ApiClient,
|
|
612
|
+
) -> JudgeExecutionResponse:
|
|
613
|
+
"""
|
|
614
|
+
Run a judge by name.
|
|
615
|
+
|
|
616
|
+
Args:
|
|
617
|
+
name: Name of the judge to run
|
|
618
|
+
response: LLM output to evaluate
|
|
619
|
+
request: The prompt sent to the LLM. Optional.
|
|
620
|
+
contexts: Optional documents passed to RAG evaluators
|
|
621
|
+
functions: Optional functions to execute
|
|
622
|
+
expected_output: Optional expected output
|
|
623
|
+
tags: Optional tags to add to the judge execution
|
|
624
|
+
_request_timeout: Optional timeout for the request
|
|
625
|
+
"""
|
|
626
|
+
api_instance = JudgesApi(_client)
|
|
627
|
+
execution_request = JudgeExecutionRequest(
|
|
628
|
+
request=request,
|
|
629
|
+
response=response,
|
|
630
|
+
contexts=contexts,
|
|
631
|
+
functions=functions,
|
|
632
|
+
expected_output=expected_output,
|
|
633
|
+
tags=tags,
|
|
634
|
+
)
|
|
635
|
+
return api_instance.judges_execute_by_name_create(
|
|
636
|
+
name=name,
|
|
637
|
+
judge_execution_request=execution_request,
|
|
638
|
+
_request_timeout=_request_timeout,
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
@with_async_client
|
|
642
|
+
async def arun_by_name(
|
|
643
|
+
self,
|
|
644
|
+
name: str,
|
|
645
|
+
*,
|
|
646
|
+
response: str,
|
|
647
|
+
request: Optional[str] = None,
|
|
648
|
+
contexts: Optional[List[str]] = None,
|
|
649
|
+
functions: Optional[List[AEvaluatorExecutionFunctionsRequest]] = None,
|
|
650
|
+
expected_output: Optional[str] = None,
|
|
651
|
+
tags: Optional[List[str]] = None,
|
|
652
|
+
_request_timeout: Optional[int] = None,
|
|
653
|
+
_client: AApiClient,
|
|
654
|
+
) -> AJudgeExecutionResponse:
|
|
655
|
+
"""
|
|
656
|
+
Asynchronously run a judge by name.
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
name: Name of the judge to run
|
|
660
|
+
response: LLM output to evaluate
|
|
661
|
+
request: The prompt sent to the LLM. Optional.
|
|
662
|
+
contexts: Optional documents passed to RAG evaluators
|
|
663
|
+
functions: Optional functions to execute
|
|
664
|
+
expected_output: Optional expected output
|
|
665
|
+
tags: Optional tags to add to the judge execution
|
|
666
|
+
_request_timeout: Optional timeout for the request
|
|
667
|
+
"""
|
|
668
|
+
api_instance = AJudgesApi(_client)
|
|
669
|
+
execution_request = AJudgeExecutionRequest(
|
|
670
|
+
contexts=contexts,
|
|
671
|
+
functions=functions,
|
|
672
|
+
expected_output=expected_output,
|
|
673
|
+
request=request,
|
|
674
|
+
response=response,
|
|
675
|
+
tags=tags,
|
|
676
|
+
)
|
|
677
|
+
return await api_instance.judges_execute_by_name_create(
|
|
678
|
+
name=name,
|
|
679
|
+
judge_execution_request=execution_request,
|
|
680
|
+
_request_timeout=_request_timeout,
|
|
681
|
+
)
|