judgeval 0.1.0__py3-none-any.whl → 0.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- judgeval/__init__.py +173 -10
- judgeval/api/__init__.py +523 -0
- judgeval/api/api_types.py +413 -0
- judgeval/cli.py +112 -0
- judgeval/constants.py +7 -30
- judgeval/data/__init__.py +1 -3
- judgeval/data/evaluation_run.py +125 -0
- judgeval/data/example.py +14 -40
- judgeval/data/judgment_types.py +396 -146
- judgeval/data/result.py +11 -18
- judgeval/data/scorer_data.py +3 -26
- judgeval/data/scripts/openapi_transform.py +5 -5
- judgeval/data/trace.py +115 -194
- judgeval/dataset/__init__.py +335 -0
- judgeval/env.py +55 -0
- judgeval/evaluation/__init__.py +346 -0
- judgeval/exceptions.py +28 -0
- judgeval/integrations/langgraph/__init__.py +13 -0
- judgeval/integrations/openlit/__init__.py +51 -0
- judgeval/judges/__init__.py +2 -2
- judgeval/judges/litellm_judge.py +77 -16
- judgeval/judges/together_judge.py +88 -17
- judgeval/judges/utils.py +7 -20
- judgeval/judgment_attribute_keys.py +55 -0
- judgeval/{common/logger.py → logger.py} +24 -8
- judgeval/prompt/__init__.py +330 -0
- judgeval/scorers/__init__.py +11 -11
- judgeval/scorers/agent_scorer.py +15 -19
- judgeval/scorers/api_scorer.py +21 -23
- judgeval/scorers/base_scorer.py +54 -36
- judgeval/scorers/example_scorer.py +1 -3
- judgeval/scorers/judgeval_scorers/api_scorers/__init__.py +2 -24
- judgeval/scorers/judgeval_scorers/api_scorers/answer_correctness.py +2 -10
- judgeval/scorers/judgeval_scorers/api_scorers/answer_relevancy.py +2 -2
- judgeval/scorers/judgeval_scorers/api_scorers/faithfulness.py +2 -10
- judgeval/scorers/judgeval_scorers/api_scorers/instruction_adherence.py +2 -14
- judgeval/scorers/judgeval_scorers/api_scorers/prompt_scorer.py +171 -59
- judgeval/scorers/score.py +64 -47
- judgeval/scorers/utils.py +2 -107
- judgeval/tracer/__init__.py +1111 -2
- judgeval/tracer/constants.py +1 -0
- judgeval/tracer/exporters/__init__.py +40 -0
- judgeval/tracer/exporters/s3.py +119 -0
- judgeval/tracer/exporters/store.py +59 -0
- judgeval/tracer/exporters/utils.py +32 -0
- judgeval/tracer/keys.py +63 -0
- judgeval/tracer/llm/__init__.py +7 -0
- judgeval/tracer/llm/config.py +78 -0
- judgeval/tracer/llm/constants.py +9 -0
- judgeval/tracer/llm/llm_anthropic/__init__.py +3 -0
- judgeval/tracer/llm/llm_anthropic/config.py +6 -0
- judgeval/tracer/llm/llm_anthropic/messages.py +452 -0
- judgeval/tracer/llm/llm_anthropic/messages_stream.py +322 -0
- judgeval/tracer/llm/llm_anthropic/wrapper.py +59 -0
- judgeval/tracer/llm/llm_google/__init__.py +3 -0
- judgeval/tracer/llm/llm_google/config.py +6 -0
- judgeval/tracer/llm/llm_google/generate_content.py +127 -0
- judgeval/tracer/llm/llm_google/wrapper.py +30 -0
- judgeval/tracer/llm/llm_openai/__init__.py +3 -0
- judgeval/tracer/llm/llm_openai/beta_chat_completions.py +216 -0
- judgeval/tracer/llm/llm_openai/chat_completions.py +501 -0
- judgeval/tracer/llm/llm_openai/config.py +6 -0
- judgeval/tracer/llm/llm_openai/responses.py +506 -0
- judgeval/tracer/llm/llm_openai/utils.py +42 -0
- judgeval/tracer/llm/llm_openai/wrapper.py +63 -0
- judgeval/tracer/llm/llm_together/__init__.py +3 -0
- judgeval/tracer/llm/llm_together/chat_completions.py +406 -0
- judgeval/tracer/llm/llm_together/config.py +6 -0
- judgeval/tracer/llm/llm_together/wrapper.py +52 -0
- judgeval/tracer/llm/providers.py +19 -0
- judgeval/tracer/managers.py +167 -0
- judgeval/tracer/processors/__init__.py +220 -0
- judgeval/tracer/utils.py +19 -0
- judgeval/trainer/__init__.py +14 -0
- judgeval/trainer/base_trainer.py +122 -0
- judgeval/trainer/config.py +123 -0
- judgeval/trainer/console.py +144 -0
- judgeval/trainer/fireworks_trainer.py +392 -0
- judgeval/trainer/trainable_model.py +252 -0
- judgeval/trainer/trainer.py +70 -0
- judgeval/utils/async_utils.py +39 -0
- judgeval/utils/decorators/__init__.py +0 -0
- judgeval/utils/decorators/dont_throw.py +37 -0
- judgeval/utils/decorators/use_once.py +13 -0
- judgeval/utils/file_utils.py +74 -28
- judgeval/utils/guards.py +36 -0
- judgeval/utils/meta.py +27 -0
- judgeval/utils/project.py +15 -0
- judgeval/utils/serialize.py +253 -0
- judgeval/utils/testing.py +70 -0
- judgeval/utils/url.py +10 -0
- judgeval/{version_check.py → utils/version_check.py} +5 -3
- judgeval/utils/wrappers/README.md +3 -0
- judgeval/utils/wrappers/__init__.py +15 -0
- judgeval/utils/wrappers/immutable_wrap_async.py +74 -0
- judgeval/utils/wrappers/immutable_wrap_async_iterator.py +84 -0
- judgeval/utils/wrappers/immutable_wrap_sync.py +66 -0
- judgeval/utils/wrappers/immutable_wrap_sync_iterator.py +84 -0
- judgeval/utils/wrappers/mutable_wrap_async.py +67 -0
- judgeval/utils/wrappers/mutable_wrap_sync.py +67 -0
- judgeval/utils/wrappers/py.typed +0 -0
- judgeval/utils/wrappers/utils.py +35 -0
- judgeval/v1/__init__.py +88 -0
- judgeval/v1/data/__init__.py +7 -0
- judgeval/v1/data/example.py +44 -0
- judgeval/v1/data/scorer_data.py +42 -0
- judgeval/v1/data/scoring_result.py +44 -0
- judgeval/v1/datasets/__init__.py +6 -0
- judgeval/v1/datasets/dataset.py +214 -0
- judgeval/v1/datasets/dataset_factory.py +94 -0
- judgeval/v1/evaluation/__init__.py +6 -0
- judgeval/v1/evaluation/evaluation.py +182 -0
- judgeval/v1/evaluation/evaluation_factory.py +17 -0
- judgeval/v1/instrumentation/__init__.py +6 -0
- judgeval/v1/instrumentation/llm/__init__.py +7 -0
- judgeval/v1/instrumentation/llm/config.py +78 -0
- judgeval/v1/instrumentation/llm/constants.py +11 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/messages.py +414 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/messages_stream.py +307 -0
- judgeval/v1/instrumentation/llm/llm_anthropic/wrapper.py +61 -0
- judgeval/v1/instrumentation/llm/llm_google/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_google/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_google/generate_content.py +121 -0
- judgeval/v1/instrumentation/llm/llm_google/wrapper.py +30 -0
- judgeval/v1/instrumentation/llm/llm_openai/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_openai/beta_chat_completions.py +212 -0
- judgeval/v1/instrumentation/llm/llm_openai/chat_completions.py +477 -0
- judgeval/v1/instrumentation/llm/llm_openai/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_openai/responses.py +472 -0
- judgeval/v1/instrumentation/llm/llm_openai/utils.py +41 -0
- judgeval/v1/instrumentation/llm/llm_openai/wrapper.py +63 -0
- judgeval/v1/instrumentation/llm/llm_together/__init__.py +5 -0
- judgeval/v1/instrumentation/llm/llm_together/chat_completions.py +382 -0
- judgeval/v1/instrumentation/llm/llm_together/config.py +6 -0
- judgeval/v1/instrumentation/llm/llm_together/wrapper.py +57 -0
- judgeval/v1/instrumentation/llm/providers.py +19 -0
- judgeval/v1/integrations/claude_agent_sdk/__init__.py +119 -0
- judgeval/v1/integrations/claude_agent_sdk/wrapper.py +564 -0
- judgeval/v1/integrations/langgraph/__init__.py +13 -0
- judgeval/v1/integrations/openlit/__init__.py +47 -0
- judgeval/v1/internal/api/__init__.py +525 -0
- judgeval/v1/internal/api/api_types.py +413 -0
- judgeval/v1/prompts/__init__.py +6 -0
- judgeval/v1/prompts/prompt.py +29 -0
- judgeval/v1/prompts/prompt_factory.py +189 -0
- judgeval/v1/py.typed +0 -0
- judgeval/v1/scorers/__init__.py +6 -0
- judgeval/v1/scorers/api_scorer.py +82 -0
- judgeval/v1/scorers/base_scorer.py +17 -0
- judgeval/v1/scorers/built_in/__init__.py +17 -0
- judgeval/v1/scorers/built_in/answer_correctness.py +28 -0
- judgeval/v1/scorers/built_in/answer_relevancy.py +28 -0
- judgeval/v1/scorers/built_in/built_in_factory.py +26 -0
- judgeval/v1/scorers/built_in/faithfulness.py +28 -0
- judgeval/v1/scorers/built_in/instruction_adherence.py +28 -0
- judgeval/v1/scorers/custom_scorer/__init__.py +6 -0
- judgeval/v1/scorers/custom_scorer/custom_scorer.py +50 -0
- judgeval/v1/scorers/custom_scorer/custom_scorer_factory.py +16 -0
- judgeval/v1/scorers/prompt_scorer/__init__.py +6 -0
- judgeval/v1/scorers/prompt_scorer/prompt_scorer.py +86 -0
- judgeval/v1/scorers/prompt_scorer/prompt_scorer_factory.py +85 -0
- judgeval/v1/scorers/scorers_factory.py +49 -0
- judgeval/v1/tracer/__init__.py +7 -0
- judgeval/v1/tracer/base_tracer.py +520 -0
- judgeval/v1/tracer/exporters/__init__.py +14 -0
- judgeval/v1/tracer/exporters/in_memory_span_exporter.py +25 -0
- judgeval/v1/tracer/exporters/judgment_span_exporter.py +42 -0
- judgeval/v1/tracer/exporters/noop_span_exporter.py +19 -0
- judgeval/v1/tracer/exporters/span_store.py +50 -0
- judgeval/v1/tracer/judgment_tracer_provider.py +70 -0
- judgeval/v1/tracer/processors/__init__.py +6 -0
- judgeval/v1/tracer/processors/_lifecycles/__init__.py +28 -0
- judgeval/v1/tracer/processors/_lifecycles/agent_id_processor.py +53 -0
- judgeval/v1/tracer/processors/_lifecycles/context_keys.py +11 -0
- judgeval/v1/tracer/processors/_lifecycles/customer_id_processor.py +29 -0
- judgeval/v1/tracer/processors/_lifecycles/registry.py +18 -0
- judgeval/v1/tracer/processors/judgment_span_processor.py +165 -0
- judgeval/v1/tracer/processors/noop_span_processor.py +42 -0
- judgeval/v1/tracer/tracer.py +67 -0
- judgeval/v1/tracer/tracer_factory.py +38 -0
- judgeval/v1/trainers/__init__.py +5 -0
- judgeval/v1/trainers/base_trainer.py +62 -0
- judgeval/v1/trainers/config.py +123 -0
- judgeval/v1/trainers/console.py +144 -0
- judgeval/v1/trainers/fireworks_trainer.py +392 -0
- judgeval/v1/trainers/trainable_model.py +252 -0
- judgeval/v1/trainers/trainers_factory.py +37 -0
- judgeval/v1/utils.py +18 -0
- judgeval/version.py +5 -0
- judgeval/warnings.py +4 -0
- judgeval-0.23.0.dist-info/METADATA +266 -0
- judgeval-0.23.0.dist-info/RECORD +201 -0
- judgeval-0.23.0.dist-info/entry_points.txt +2 -0
- judgeval/clients.py +0 -34
- judgeval/common/__init__.py +0 -13
- judgeval/common/api/__init__.py +0 -3
- judgeval/common/api/api.py +0 -352
- judgeval/common/api/constants.py +0 -165
- judgeval/common/exceptions.py +0 -27
- judgeval/common/storage/__init__.py +0 -6
- judgeval/common/storage/s3_storage.py +0 -98
- judgeval/common/tracer/__init__.py +0 -31
- judgeval/common/tracer/constants.py +0 -22
- judgeval/common/tracer/core.py +0 -1916
- judgeval/common/tracer/otel_exporter.py +0 -108
- judgeval/common/tracer/otel_span_processor.py +0 -234
- judgeval/common/tracer/span_processor.py +0 -37
- judgeval/common/tracer/span_transformer.py +0 -211
- judgeval/common/tracer/trace_manager.py +0 -92
- judgeval/common/utils.py +0 -940
- judgeval/data/datasets/__init__.py +0 -4
- judgeval/data/datasets/dataset.py +0 -341
- judgeval/data/datasets/eval_dataset_client.py +0 -214
- judgeval/data/tool.py +0 -5
- judgeval/data/trace_run.py +0 -37
- judgeval/evaluation_run.py +0 -75
- judgeval/integrations/langgraph.py +0 -843
- judgeval/judges/mixture_of_judges.py +0 -286
- judgeval/judgment_client.py +0 -369
- judgeval/rules.py +0 -521
- judgeval/run_evaluation.py +0 -684
- judgeval/scorers/judgeval_scorers/api_scorers/derailment_scorer.py +0 -14
- judgeval/scorers/judgeval_scorers/api_scorers/execution_order.py +0 -52
- judgeval/scorers/judgeval_scorers/api_scorers/hallucination.py +0 -28
- judgeval/scorers/judgeval_scorers/api_scorers/tool_dependency.py +0 -20
- judgeval/scorers/judgeval_scorers/api_scorers/tool_order.py +0 -27
- judgeval/utils/alerts.py +0 -93
- judgeval/utils/requests.py +0 -50
- judgeval-0.1.0.dist-info/METADATA +0 -202
- judgeval-0.1.0.dist-info/RECORD +0 -73
- {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/WHEEL +0 -0
- {judgeval-0.1.0.dist-info → judgeval-0.23.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
from typing import Dict, Any, Mapping, Literal, Optional
|
|
2
|
+
import httpx
|
|
3
|
+
from httpx import Response
|
|
4
|
+
from judgeval.exceptions import JudgmentAPIError
|
|
5
|
+
from judgeval.utils.url import url_for
|
|
6
|
+
from judgeval.utils.serialize import json_encoder
|
|
7
|
+
from judgeval.v1.internal.api.api_types import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _headers(api_key: str, organization_id: str) -> Mapping[str, str]:
|
|
11
|
+
return {
|
|
12
|
+
"Content-Type": "application/json",
|
|
13
|
+
"Authorization": f"Bearer {api_key}",
|
|
14
|
+
"X-Organization-Id": organization_id,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _handle_response(r: Response) -> Any:
|
|
19
|
+
if r.status_code >= 400:
|
|
20
|
+
try:
|
|
21
|
+
detail = r.json().get("detail", "")
|
|
22
|
+
except Exception:
|
|
23
|
+
detail = r.text
|
|
24
|
+
raise JudgmentAPIError(r.status_code, detail, r)
|
|
25
|
+
return r.json()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class JudgmentSyncClient:
|
|
29
|
+
__slots__ = ("base_url", "api_key", "organization_id", "client")
|
|
30
|
+
|
|
31
|
+
def __init__(self, base_url: str, api_key: str, organization_id: str):
|
|
32
|
+
self.base_url = base_url
|
|
33
|
+
self.api_key = api_key
|
|
34
|
+
self.organization_id = organization_id
|
|
35
|
+
self.client = httpx.Client(timeout=30)
|
|
36
|
+
|
|
37
|
+
def _request(
|
|
38
|
+
self,
|
|
39
|
+
method: Literal["POST", "PATCH", "GET", "DELETE"],
|
|
40
|
+
url: str,
|
|
41
|
+
payload: Any,
|
|
42
|
+
params: Optional[Dict[str, Any]] = None,
|
|
43
|
+
) -> Any:
|
|
44
|
+
if method == "GET":
|
|
45
|
+
r = self.client.request(
|
|
46
|
+
method,
|
|
47
|
+
url,
|
|
48
|
+
params=payload if params is None else params,
|
|
49
|
+
headers=_headers(self.api_key, self.organization_id),
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
r = self.client.request(
|
|
53
|
+
method,
|
|
54
|
+
url,
|
|
55
|
+
json=json_encoder(payload),
|
|
56
|
+
params=params,
|
|
57
|
+
headers=_headers(self.api_key, self.organization_id),
|
|
58
|
+
)
|
|
59
|
+
return _handle_response(r)
|
|
60
|
+
|
|
61
|
+
def add_to_run_eval_queue_examples(self, payload: ExampleEvaluationRun) -> Any:
|
|
62
|
+
return self._request(
|
|
63
|
+
"POST",
|
|
64
|
+
url_for("/add_to_run_eval_queue/examples", self.base_url),
|
|
65
|
+
payload,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def add_to_run_eval_queue_traces(self, payload: TraceEvaluationRun) -> Any:
|
|
69
|
+
return self._request(
|
|
70
|
+
"POST",
|
|
71
|
+
url_for("/add_to_run_eval_queue/traces", self.base_url),
|
|
72
|
+
payload,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def evaluate_examples(
|
|
76
|
+
self, payload: ExampleEvaluationRun, stream: Optional[str] = None
|
|
77
|
+
) -> EvaluateResponse:
|
|
78
|
+
query_params = {}
|
|
79
|
+
if stream is not None:
|
|
80
|
+
query_params["stream"] = stream
|
|
81
|
+
return self._request(
|
|
82
|
+
"POST",
|
|
83
|
+
url_for("/evaluate/examples", self.base_url),
|
|
84
|
+
payload,
|
|
85
|
+
params=query_params,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def evaluate_traces(
|
|
89
|
+
self, payload: TraceEvaluationRun, stream: Optional[str] = None
|
|
90
|
+
) -> EvaluateResponse:
|
|
91
|
+
query_params = {}
|
|
92
|
+
if stream is not None:
|
|
93
|
+
query_params["stream"] = stream
|
|
94
|
+
return self._request(
|
|
95
|
+
"POST",
|
|
96
|
+
url_for("/evaluate/traces", self.base_url),
|
|
97
|
+
payload,
|
|
98
|
+
params=query_params,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def log_eval_results(self, payload: EvalResults) -> LogEvalResultsResponse:
|
|
102
|
+
return self._request(
|
|
103
|
+
"POST",
|
|
104
|
+
url_for("/log_eval_results/", self.base_url),
|
|
105
|
+
payload,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def fetch_experiment_run(
|
|
109
|
+
self, payload: EvalResultsFetch
|
|
110
|
+
) -> FetchExperimentRunResponse:
|
|
111
|
+
return self._request(
|
|
112
|
+
"POST",
|
|
113
|
+
url_for("/fetch_experiment_run/", self.base_url),
|
|
114
|
+
payload,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def datasets_insert_examples_for_judgeval(
|
|
118
|
+
self, payload: DatasetInsertExamples
|
|
119
|
+
) -> Any:
|
|
120
|
+
return self._request(
|
|
121
|
+
"POST",
|
|
122
|
+
url_for("/datasets/insert_examples_for_judgeval/", self.base_url),
|
|
123
|
+
payload,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
|
|
127
|
+
return self._request(
|
|
128
|
+
"POST",
|
|
129
|
+
url_for("/datasets/pull_for_judgeval/", self.base_url),
|
|
130
|
+
payload,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
|
|
134
|
+
return self._request(
|
|
135
|
+
"POST",
|
|
136
|
+
url_for("/datasets/pull_all_for_judgeval/", self.base_url),
|
|
137
|
+
payload,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
|
|
141
|
+
return self._request(
|
|
142
|
+
"POST",
|
|
143
|
+
url_for("/datasets/create_for_judgeval/", self.base_url),
|
|
144
|
+
payload,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def projects_add(self, payload: ProjectAdd) -> ProjectAddResponse:
|
|
148
|
+
return self._request(
|
|
149
|
+
"POST",
|
|
150
|
+
url_for("/projects/add/", self.base_url),
|
|
151
|
+
payload,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def projects_delete_from_judgeval(
|
|
155
|
+
self, payload: ProjectDeleteFromJudgevalResponse
|
|
156
|
+
) -> ProjectDeleteResponse:
|
|
157
|
+
return self._request(
|
|
158
|
+
"DELETE",
|
|
159
|
+
url_for("/projects/delete_from_judgeval/", self.base_url),
|
|
160
|
+
payload,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def scorer_exists(self, payload: ScorerExistsRequest) -> ScorerExistsResponse:
|
|
164
|
+
return self._request(
|
|
165
|
+
"POST",
|
|
166
|
+
url_for("/scorer_exists/", self.base_url),
|
|
167
|
+
payload,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
def save_scorer(self, payload: SavePromptScorerRequest) -> SavePromptScorerResponse:
|
|
171
|
+
return self._request(
|
|
172
|
+
"POST",
|
|
173
|
+
url_for("/save_scorer/", self.base_url),
|
|
174
|
+
payload,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def fetch_scorers(
|
|
178
|
+
self, payload: FetchPromptScorersRequest
|
|
179
|
+
) -> FetchPromptScorersResponse:
|
|
180
|
+
return self._request(
|
|
181
|
+
"POST",
|
|
182
|
+
url_for("/fetch_scorers/", self.base_url),
|
|
183
|
+
payload,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def upload_custom_scorer(
|
|
187
|
+
self, payload: CustomScorerUploadPayload
|
|
188
|
+
) -> CustomScorerTemplateResponse:
|
|
189
|
+
return self._request(
|
|
190
|
+
"POST",
|
|
191
|
+
url_for("/upload_custom_scorer/", self.base_url),
|
|
192
|
+
payload,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def prompts_insert(self, payload: PromptInsertRequest) -> PromptInsertResponse:
|
|
196
|
+
return self._request(
|
|
197
|
+
"POST",
|
|
198
|
+
url_for("/prompts/insert/", self.base_url),
|
|
199
|
+
payload,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
|
|
203
|
+
return self._request(
|
|
204
|
+
"POST",
|
|
205
|
+
url_for("/prompts/tag/", self.base_url),
|
|
206
|
+
payload,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
|
|
210
|
+
return self._request(
|
|
211
|
+
"POST",
|
|
212
|
+
url_for("/prompts/untag/", self.base_url),
|
|
213
|
+
payload,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def prompts_fetch(
|
|
217
|
+
self,
|
|
218
|
+
project_id: str,
|
|
219
|
+
name: str,
|
|
220
|
+
commit_id: Optional[str] = None,
|
|
221
|
+
tag: Optional[str] = None,
|
|
222
|
+
) -> PromptFetchResponse:
|
|
223
|
+
query_params = {}
|
|
224
|
+
query_params["project_id"] = project_id
|
|
225
|
+
query_params["name"] = name
|
|
226
|
+
if commit_id is not None:
|
|
227
|
+
query_params["commit_id"] = commit_id
|
|
228
|
+
if tag is not None:
|
|
229
|
+
query_params["tag"] = tag
|
|
230
|
+
return self._request(
|
|
231
|
+
"GET",
|
|
232
|
+
url_for("/prompts/fetch/", self.base_url),
|
|
233
|
+
query_params,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def prompts_get_prompt_versions(
|
|
237
|
+
self, project_id: str, name: str
|
|
238
|
+
) -> PromptVersionsResponse:
|
|
239
|
+
query_params = {}
|
|
240
|
+
query_params["project_id"] = project_id
|
|
241
|
+
query_params["name"] = name
|
|
242
|
+
return self._request(
|
|
243
|
+
"GET",
|
|
244
|
+
url_for("/prompts/get_prompt_versions/", self.base_url),
|
|
245
|
+
query_params,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def projects_resolve(
|
|
249
|
+
self, payload: ResolveProjectNameRequest
|
|
250
|
+
) -> ResolveProjectNameResponse:
|
|
251
|
+
return self._request(
|
|
252
|
+
"POST",
|
|
253
|
+
url_for("/projects/resolve/", self.base_url),
|
|
254
|
+
payload,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
def e2e_fetch_trace(self, payload: TraceIdRequest) -> Any:
|
|
258
|
+
return self._request(
|
|
259
|
+
"POST",
|
|
260
|
+
url_for("/e2e_fetch_trace/", self.base_url),
|
|
261
|
+
payload,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def e2e_fetch_span_score(self, payload: SpanScoreRequest) -> Any:
|
|
265
|
+
return self._request(
|
|
266
|
+
"POST",
|
|
267
|
+
url_for("/e2e_fetch_span_score/", self.base_url),
|
|
268
|
+
payload,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class JudgmentAsyncClient:
|
|
273
|
+
__slots__ = ("base_url", "api_key", "organization_id", "client")
|
|
274
|
+
|
|
275
|
+
def __init__(self, base_url: str, api_key: str, organization_id: str):
|
|
276
|
+
self.base_url = base_url
|
|
277
|
+
self.api_key = api_key
|
|
278
|
+
self.organization_id = organization_id
|
|
279
|
+
self.client = httpx.AsyncClient(timeout=30)
|
|
280
|
+
|
|
281
|
+
async def _request(
|
|
282
|
+
self,
|
|
283
|
+
method: Literal["POST", "PATCH", "GET", "DELETE"],
|
|
284
|
+
url: str,
|
|
285
|
+
payload: Any,
|
|
286
|
+
params: Optional[Dict[str, Any]] = None,
|
|
287
|
+
) -> Any:
|
|
288
|
+
if method == "GET":
|
|
289
|
+
r = self.client.request(
|
|
290
|
+
method,
|
|
291
|
+
url,
|
|
292
|
+
params=payload if params is None else params,
|
|
293
|
+
headers=_headers(self.api_key, self.organization_id),
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
r = self.client.request(
|
|
297
|
+
method,
|
|
298
|
+
url,
|
|
299
|
+
json=json_encoder(payload),
|
|
300
|
+
params=params,
|
|
301
|
+
headers=_headers(self.api_key, self.organization_id),
|
|
302
|
+
)
|
|
303
|
+
return _handle_response(await r)
|
|
304
|
+
|
|
305
|
+
async def add_to_run_eval_queue_examples(
|
|
306
|
+
self, payload: ExampleEvaluationRun
|
|
307
|
+
) -> Any:
|
|
308
|
+
return await self._request(
|
|
309
|
+
"POST",
|
|
310
|
+
url_for("/add_to_run_eval_queue/examples", self.base_url),
|
|
311
|
+
payload,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
async def add_to_run_eval_queue_traces(self, payload: TraceEvaluationRun) -> Any:
|
|
315
|
+
return await self._request(
|
|
316
|
+
"POST",
|
|
317
|
+
url_for("/add_to_run_eval_queue/traces", self.base_url),
|
|
318
|
+
payload,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
async def evaluate_examples(
|
|
322
|
+
self, payload: ExampleEvaluationRun, stream: Optional[str] = None
|
|
323
|
+
) -> EvaluateResponse:
|
|
324
|
+
query_params = {}
|
|
325
|
+
if stream is not None:
|
|
326
|
+
query_params["stream"] = stream
|
|
327
|
+
return await self._request(
|
|
328
|
+
"POST",
|
|
329
|
+
url_for("/evaluate/examples", self.base_url),
|
|
330
|
+
payload,
|
|
331
|
+
params=query_params,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
async def evaluate_traces(
|
|
335
|
+
self, payload: TraceEvaluationRun, stream: Optional[str] = None
|
|
336
|
+
) -> EvaluateResponse:
|
|
337
|
+
query_params = {}
|
|
338
|
+
if stream is not None:
|
|
339
|
+
query_params["stream"] = stream
|
|
340
|
+
return await self._request(
|
|
341
|
+
"POST",
|
|
342
|
+
url_for("/evaluate/traces", self.base_url),
|
|
343
|
+
payload,
|
|
344
|
+
params=query_params,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
async def log_eval_results(self, payload: EvalResults) -> LogEvalResultsResponse:
|
|
348
|
+
return await self._request(
|
|
349
|
+
"POST",
|
|
350
|
+
url_for("/log_eval_results/", self.base_url),
|
|
351
|
+
payload,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
async def fetch_experiment_run(
|
|
355
|
+
self, payload: EvalResultsFetch
|
|
356
|
+
) -> FetchExperimentRunResponse:
|
|
357
|
+
return await self._request(
|
|
358
|
+
"POST",
|
|
359
|
+
url_for("/fetch_experiment_run/", self.base_url),
|
|
360
|
+
payload,
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
async def datasets_insert_examples_for_judgeval(
|
|
364
|
+
self, payload: DatasetInsertExamples
|
|
365
|
+
) -> Any:
|
|
366
|
+
return await self._request(
|
|
367
|
+
"POST",
|
|
368
|
+
url_for("/datasets/insert_examples_for_judgeval/", self.base_url),
|
|
369
|
+
payload,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
async def datasets_pull_for_judgeval(self, payload: DatasetFetch) -> DatasetReturn:
|
|
373
|
+
return await self._request(
|
|
374
|
+
"POST",
|
|
375
|
+
url_for("/datasets/pull_for_judgeval/", self.base_url),
|
|
376
|
+
payload,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
async def datasets_pull_all_for_judgeval(self, payload: DatasetsFetch) -> Any:
|
|
380
|
+
return await self._request(
|
|
381
|
+
"POST",
|
|
382
|
+
url_for("/datasets/pull_all_for_judgeval/", self.base_url),
|
|
383
|
+
payload,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
async def datasets_create_for_judgeval(self, payload: DatasetCreate) -> Any:
|
|
387
|
+
return await self._request(
|
|
388
|
+
"POST",
|
|
389
|
+
url_for("/datasets/create_for_judgeval/", self.base_url),
|
|
390
|
+
payload,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
async def projects_add(self, payload: ProjectAdd) -> ProjectAddResponse:
|
|
394
|
+
return await self._request(
|
|
395
|
+
"POST",
|
|
396
|
+
url_for("/projects/add/", self.base_url),
|
|
397
|
+
payload,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
async def projects_delete_from_judgeval(
|
|
401
|
+
self, payload: ProjectDeleteFromJudgevalResponse
|
|
402
|
+
) -> ProjectDeleteResponse:
|
|
403
|
+
return await self._request(
|
|
404
|
+
"DELETE",
|
|
405
|
+
url_for("/projects/delete_from_judgeval/", self.base_url),
|
|
406
|
+
payload,
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
async def scorer_exists(self, payload: ScorerExistsRequest) -> ScorerExistsResponse:
|
|
410
|
+
return await self._request(
|
|
411
|
+
"POST",
|
|
412
|
+
url_for("/scorer_exists/", self.base_url),
|
|
413
|
+
payload,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
async def save_scorer(
|
|
417
|
+
self, payload: SavePromptScorerRequest
|
|
418
|
+
) -> SavePromptScorerResponse:
|
|
419
|
+
return await self._request(
|
|
420
|
+
"POST",
|
|
421
|
+
url_for("/save_scorer/", self.base_url),
|
|
422
|
+
payload,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
async def fetch_scorers(
|
|
426
|
+
self, payload: FetchPromptScorersRequest
|
|
427
|
+
) -> FetchPromptScorersResponse:
|
|
428
|
+
return await self._request(
|
|
429
|
+
"POST",
|
|
430
|
+
url_for("/fetch_scorers/", self.base_url),
|
|
431
|
+
payload,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
async def upload_custom_scorer(
|
|
435
|
+
self, payload: CustomScorerUploadPayload
|
|
436
|
+
) -> CustomScorerTemplateResponse:
|
|
437
|
+
return await self._request(
|
|
438
|
+
"POST",
|
|
439
|
+
url_for("/upload_custom_scorer/", self.base_url),
|
|
440
|
+
payload,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
async def prompts_insert(
|
|
444
|
+
self, payload: PromptInsertRequest
|
|
445
|
+
) -> PromptInsertResponse:
|
|
446
|
+
return await self._request(
|
|
447
|
+
"POST",
|
|
448
|
+
url_for("/prompts/insert/", self.base_url),
|
|
449
|
+
payload,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
async def prompts_tag(self, payload: PromptTagRequest) -> PromptTagResponse:
|
|
453
|
+
return await self._request(
|
|
454
|
+
"POST",
|
|
455
|
+
url_for("/prompts/tag/", self.base_url),
|
|
456
|
+
payload,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
async def prompts_untag(self, payload: PromptUntagRequest) -> PromptUntagResponse:
|
|
460
|
+
return await self._request(
|
|
461
|
+
"POST",
|
|
462
|
+
url_for("/prompts/untag/", self.base_url),
|
|
463
|
+
payload,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
async def prompts_fetch(
|
|
467
|
+
self,
|
|
468
|
+
project_id: str,
|
|
469
|
+
name: str,
|
|
470
|
+
commit_id: Optional[str] = None,
|
|
471
|
+
tag: Optional[str] = None,
|
|
472
|
+
) -> PromptFetchResponse:
|
|
473
|
+
query_params = {}
|
|
474
|
+
query_params["project_id"] = project_id
|
|
475
|
+
query_params["name"] = name
|
|
476
|
+
if commit_id is not None:
|
|
477
|
+
query_params["commit_id"] = commit_id
|
|
478
|
+
if tag is not None:
|
|
479
|
+
query_params["tag"] = tag
|
|
480
|
+
return await self._request(
|
|
481
|
+
"GET",
|
|
482
|
+
url_for("/prompts/fetch/", self.base_url),
|
|
483
|
+
query_params,
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
async def prompts_get_prompt_versions(
|
|
487
|
+
self, project_id: str, name: str
|
|
488
|
+
) -> PromptVersionsResponse:
|
|
489
|
+
query_params = {}
|
|
490
|
+
query_params["project_id"] = project_id
|
|
491
|
+
query_params["name"] = name
|
|
492
|
+
return await self._request(
|
|
493
|
+
"GET",
|
|
494
|
+
url_for("/prompts/get_prompt_versions/", self.base_url),
|
|
495
|
+
query_params,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
async def projects_resolve(
|
|
499
|
+
self, payload: ResolveProjectNameRequest
|
|
500
|
+
) -> ResolveProjectNameResponse:
|
|
501
|
+
return await self._request(
|
|
502
|
+
"POST",
|
|
503
|
+
url_for("/projects/resolve/", self.base_url),
|
|
504
|
+
payload,
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
async def e2e_fetch_trace(self, payload: TraceIdRequest) -> Any:
|
|
508
|
+
return await self._request(
|
|
509
|
+
"POST",
|
|
510
|
+
url_for("/e2e_fetch_trace/", self.base_url),
|
|
511
|
+
payload,
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
async def e2e_fetch_span_score(self, payload: SpanScoreRequest) -> Any:
|
|
515
|
+
return await self._request(
|
|
516
|
+
"POST",
|
|
517
|
+
url_for("/e2e_fetch_span_score/", self.base_url),
|
|
518
|
+
payload,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
__all__ = [
|
|
523
|
+
"JudgmentSyncClient",
|
|
524
|
+
"JudgmentAsyncClient",
|
|
525
|
+
]
|