evalgate-sdk 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalgate_sdk/__init__.py +707 -0
- evalgate_sdk/_version.py +3 -0
- evalgate_sdk/assertions.py +1362 -0
- evalgate_sdk/auto.py +247 -0
- evalgate_sdk/batch.py +174 -0
- evalgate_sdk/cache.py +111 -0
- evalgate_sdk/ci_context.py +123 -0
- evalgate_sdk/cli/__init__.py +111 -0
- evalgate_sdk/cli/api.py +261 -0
- evalgate_sdk/cli/cli_constants.py +20 -0
- evalgate_sdk/cli/commands.py +1041 -0
- evalgate_sdk/cli/config.py +228 -0
- evalgate_sdk/cli/env.py +43 -0
- evalgate_sdk/cli/formatters/types.py +132 -0
- evalgate_sdk/cli/golden_commands.py +322 -0
- evalgate_sdk/cli/manifest.py +301 -0
- evalgate_sdk/cli/new_commands.py +435 -0
- evalgate_sdk/cli/policy_packs.py +103 -0
- evalgate_sdk/cli/profiles.py +12 -0
- evalgate_sdk/cli/regression_gate.py +312 -0
- evalgate_sdk/cli/render/__init__.py +1 -0
- evalgate_sdk/cli/render/snippet.py +18 -0
- evalgate_sdk/cli/render/sort.py +29 -0
- evalgate_sdk/cli/report/__init__.py +1 -0
- evalgate_sdk/cli/report/build_check_report.py +209 -0
- evalgate_sdk/cli/traces.py +186 -0
- evalgate_sdk/cli/workspace.py +63 -0
- evalgate_sdk/client.py +609 -0
- evalgate_sdk/cluster.py +359 -0
- evalgate_sdk/collector.py +161 -0
- evalgate_sdk/constants.py +6 -0
- evalgate_sdk/context.py +151 -0
- evalgate_sdk/errors.py +236 -0
- evalgate_sdk/export.py +238 -0
- evalgate_sdk/formatters/__init__.py +11 -0
- evalgate_sdk/formatters/github.py +51 -0
- evalgate_sdk/formatters/human.py +68 -0
- evalgate_sdk/formatters/json_fmt.py +11 -0
- evalgate_sdk/formatters/pr_comment.py +80 -0
- evalgate_sdk/golden.py +426 -0
- evalgate_sdk/integrations/__init__.py +1 -0
- evalgate_sdk/integrations/anthropic.py +99 -0
- evalgate_sdk/integrations/autogen.py +62 -0
- evalgate_sdk/integrations/crewai.py +61 -0
- evalgate_sdk/integrations/langchain.py +100 -0
- evalgate_sdk/integrations/openai.py +155 -0
- evalgate_sdk/integrations/openai_eval.py +221 -0
- evalgate_sdk/local.py +144 -0
- evalgate_sdk/logger.py +123 -0
- evalgate_sdk/matchers.py +62 -0
- evalgate_sdk/otel.py +256 -0
- evalgate_sdk/pagination.py +145 -0
- evalgate_sdk/py.typed +0 -0
- evalgate_sdk/pytest_plugin.py +96 -0
- evalgate_sdk/reason_codes.py +103 -0
- evalgate_sdk/regression.py +196 -0
- evalgate_sdk/replay_decision.py +115 -0
- evalgate_sdk/runtime/__init__.py +50 -0
- evalgate_sdk/runtime/adapters/__init__.py +1 -0
- evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
- evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
- evalgate_sdk/runtime/context.py +68 -0
- evalgate_sdk/runtime/eval.py +318 -0
- evalgate_sdk/runtime/execution_mode.py +170 -0
- evalgate_sdk/runtime/executor.py +92 -0
- evalgate_sdk/runtime/registry.py +125 -0
- evalgate_sdk/runtime/run_report.py +249 -0
- evalgate_sdk/runtime/types.py +143 -0
- evalgate_sdk/snapshot.py +219 -0
- evalgate_sdk/streaming.py +124 -0
- evalgate_sdk/synthesize.py +226 -0
- evalgate_sdk/testing.py +128 -0
- evalgate_sdk/types.py +666 -0
- evalgate_sdk/utils/__init__.py +1 -0
- evalgate_sdk/utils/input_hash.py +42 -0
- evalgate_sdk/workflows.py +264 -0
- evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
- evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
- evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
- evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
evalgate_sdk/client.py
ADDED
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
"""AIEvalClient — async HTTP client for the EvalGate API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import builtins
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
from typing import Any, TypeVar
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from evalgate_sdk._version import SDK_VERSION, SPEC_VERSION
|
|
13
|
+
from evalgate_sdk.errors import (
|
|
14
|
+
EvalGateError,
|
|
15
|
+
NetworkError,
|
|
16
|
+
create_error_from_response,
|
|
17
|
+
)
|
|
18
|
+
from evalgate_sdk.types import (
|
|
19
|
+
Annotation,
|
|
20
|
+
AnnotationItem,
|
|
21
|
+
AnnotationTask,
|
|
22
|
+
APIKey,
|
|
23
|
+
APIKeyUsage,
|
|
24
|
+
APIKeyWithSecret,
|
|
25
|
+
ClientConfig,
|
|
26
|
+
CreateAnnotationItemParams,
|
|
27
|
+
CreateAnnotationParams,
|
|
28
|
+
CreateAnnotationTaskParams,
|
|
29
|
+
CreateAPIKeyParams,
|
|
30
|
+
CreateEvaluationParams,
|
|
31
|
+
CreateLLMJudgeConfigParams,
|
|
32
|
+
CreateRunParams,
|
|
33
|
+
CreateSpanParams,
|
|
34
|
+
CreateTestCaseParams,
|
|
35
|
+
CreateTraceParams,
|
|
36
|
+
CreateWebhookParams,
|
|
37
|
+
Evaluation,
|
|
38
|
+
EvaluationRun,
|
|
39
|
+
GetLLMJudgeAlignmentParams,
|
|
40
|
+
GetUsageParams,
|
|
41
|
+
ListAnnotationItemsParams,
|
|
42
|
+
ListAnnotationsParams,
|
|
43
|
+
ListAnnotationTasksParams,
|
|
44
|
+
ListAPIKeysParams,
|
|
45
|
+
ListEvaluationsParams,
|
|
46
|
+
ListLLMJudgeConfigsParams,
|
|
47
|
+
ListLLMJudgeResultsParams,
|
|
48
|
+
ListTracesParams,
|
|
49
|
+
ListWebhookDeliveriesParams,
|
|
50
|
+
ListWebhooksParams,
|
|
51
|
+
LLMJudgeAlignment,
|
|
52
|
+
LLMJudgeConfig,
|
|
53
|
+
LLMJudgeResult,
|
|
54
|
+
Organization,
|
|
55
|
+
QualityScore,
|
|
56
|
+
RunLLMJudgeParams,
|
|
57
|
+
Span,
|
|
58
|
+
TestCase,
|
|
59
|
+
Trace,
|
|
60
|
+
UpdateAPIKeyParams,
|
|
61
|
+
UpdateEvaluationParams,
|
|
62
|
+
UpdateTraceParams,
|
|
63
|
+
UpdateWebhookParams,
|
|
64
|
+
UsageStats,
|
|
65
|
+
UsageSummary,
|
|
66
|
+
Webhook,
|
|
67
|
+
WebhookDelivery,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
logger = logging.getLogger("evalgate_sdk")
|
|
71
|
+
|
|
72
|
+
T = TypeVar("T")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_LEGACY_WARNED: set[str] = set()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _env(name: str, legacy: str | None = None) -> str | None:
|
|
79
|
+
v = os.environ.get(name)
|
|
80
|
+
if v:
|
|
81
|
+
return v
|
|
82
|
+
if legacy:
|
|
83
|
+
legacy_val = os.environ.get(legacy)
|
|
84
|
+
if legacy_val and legacy not in _LEGACY_WARNED:
|
|
85
|
+
import warnings
|
|
86
|
+
|
|
87
|
+
warnings.warn(
|
|
88
|
+
f"[EvalGate] Deprecation: {legacy} is deprecated. Use {name} instead.",
|
|
89
|
+
DeprecationWarning,
|
|
90
|
+
stacklevel=2,
|
|
91
|
+
)
|
|
92
|
+
_LEGACY_WARNED.add(legacy)
|
|
93
|
+
return legacy_val
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _load_config_file() -> dict[str, Any]:
|
|
98
|
+
"""Load saved config from .evalgate/config.json or .evalai/config.json if it exists."""
|
|
99
|
+
import json
|
|
100
|
+
import warnings
|
|
101
|
+
from pathlib import Path
|
|
102
|
+
|
|
103
|
+
for parent in [Path.cwd(), *Path.cwd().parents]:
|
|
104
|
+
for cfg_dir in (".evalgate", ".evalai"):
|
|
105
|
+
config_path = parent / cfg_dir / "config.json"
|
|
106
|
+
if config_path.exists():
|
|
107
|
+
try:
|
|
108
|
+
data = json.loads(config_path.read_text())
|
|
109
|
+
if cfg_dir == ".evalai" and ".evalai" not in _LEGACY_WARNED:
|
|
110
|
+
warnings.warn(
|
|
111
|
+
"[EvalGate] Deprecation: .evalai/ config is deprecated. "
|
|
112
|
+
"Migrate to .evalgate/ (e.g. mv .evalai .evalgate).",
|
|
113
|
+
DeprecationWarning,
|
|
114
|
+
stacklevel=2,
|
|
115
|
+
)
|
|
116
|
+
_LEGACY_WARNED.add(".evalai")
|
|
117
|
+
return data if isinstance(data, dict) else {}
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
120
|
+
return {}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class _BaseAPI:
|
|
124
|
+
"""Shared HTTP helpers for every API sub-module."""
|
|
125
|
+
|
|
126
|
+
def __init__(self, client: AIEvalClient) -> None:
|
|
127
|
+
self._c = client
|
|
128
|
+
|
|
129
|
+
async def _get(self, path: str, params: dict[str, Any] | None = None) -> Any:
|
|
130
|
+
return await self._c._request("GET", path, params=params)
|
|
131
|
+
|
|
132
|
+
async def _post(self, path: str, json: dict[str, Any] | None = None) -> Any:
|
|
133
|
+
return await self._c._request("POST", path, json=json)
|
|
134
|
+
|
|
135
|
+
async def _patch(self, path: str, json: dict[str, Any] | None = None) -> Any:
|
|
136
|
+
return await self._c._request("PATCH", path, json=json)
|
|
137
|
+
|
|
138
|
+
async def _put(self, path: str, json: dict[str, Any] | None = None) -> Any:
|
|
139
|
+
return await self._c._request("PUT", path, json=json)
|
|
140
|
+
|
|
141
|
+
async def _delete(self, path: str) -> Any:
|
|
142
|
+
return await self._c._request("DELETE", path)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ── API sub-modules ──────────────────────────────────────────────────
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class TraceAPI(_BaseAPI):
|
|
149
|
+
async def create(self, params: CreateTraceParams) -> Trace:
|
|
150
|
+
data = await self._post("/api/traces", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
151
|
+
return Trace.model_validate(data)
|
|
152
|
+
|
|
153
|
+
async def list(self, params: ListTracesParams | None = None) -> builtins.list[Trace]:
|
|
154
|
+
q = (params or ListTracesParams()).model_dump(by_alias=True, exclude_none=True)
|
|
155
|
+
data = await self._get("/api/traces", params=q)
|
|
156
|
+
items = data if isinstance(data, list) else data.get("data", data.get("traces", []))
|
|
157
|
+
return [Trace.model_validate(t) for t in items]
|
|
158
|
+
|
|
159
|
+
async def get(self, trace_id: int) -> Trace:
|
|
160
|
+
data = await self._get(f"/api/traces/{trace_id}")
|
|
161
|
+
return Trace.model_validate(data)
|
|
162
|
+
|
|
163
|
+
async def update(self, trace_id: int, params: UpdateTraceParams) -> Trace:
|
|
164
|
+
data = await self._patch(f"/api/traces/{trace_id}", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
165
|
+
return Trace.model_validate(data)
|
|
166
|
+
|
|
167
|
+
async def delete(self, trace_id: int) -> dict[str, str]:
|
|
168
|
+
return await self._delete(f"/api/traces/{trace_id}")
|
|
169
|
+
|
|
170
|
+
async def create_span(self, trace_id: int, params: CreateSpanParams) -> Span:
|
|
171
|
+
payload = params.model_dump(by_alias=True, exclude_none=True)
|
|
172
|
+
data = await self._post(f"/api/traces/{trace_id}/spans", json=payload)
|
|
173
|
+
return Span.model_validate(data)
|
|
174
|
+
|
|
175
|
+
async def list_spans(self, trace_id: int) -> builtins.list[Span]:
|
|
176
|
+
data = await self._get(f"/api/traces/{trace_id}/spans")
|
|
177
|
+
items = data if isinstance(data, list) else data.get("data", data.get("spans", []))
|
|
178
|
+
return [Span.model_validate(s) for s in items]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class EvaluationAPI(_BaseAPI):
|
|
182
|
+
async def create(self, params: CreateEvaluationParams) -> Evaluation:
|
|
183
|
+
data = await self._post("/api/evaluations", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
184
|
+
return Evaluation.model_validate(data)
|
|
185
|
+
|
|
186
|
+
async def get(self, evaluation_id: int) -> Evaluation:
|
|
187
|
+
data = await self._get(f"/api/evaluations/{evaluation_id}")
|
|
188
|
+
payload = data.get("evaluation", data) if isinstance(data, dict) else data
|
|
189
|
+
return Evaluation.model_validate(payload)
|
|
190
|
+
|
|
191
|
+
async def list(self, params: ListEvaluationsParams | None = None) -> builtins.list[Evaluation]:
|
|
192
|
+
q = (params or ListEvaluationsParams()).model_dump(by_alias=True, exclude_none=True)
|
|
193
|
+
data = await self._get("/api/evaluations", params=q)
|
|
194
|
+
items = data if isinstance(data, list) else data.get("data", data.get("evaluations", []))
|
|
195
|
+
return [Evaluation.model_validate(e) for e in items]
|
|
196
|
+
|
|
197
|
+
async def update(self, evaluation_id: int, params: UpdateEvaluationParams) -> Evaluation:
|
|
198
|
+
body = params.model_dump(by_alias=True, exclude_none=True)
|
|
199
|
+
data = await self._patch(f"/api/evaluations/{evaluation_id}", json=body)
|
|
200
|
+
payload = data.get("evaluation", data) if isinstance(data, dict) else data
|
|
201
|
+
return Evaluation.model_validate(payload)
|
|
202
|
+
|
|
203
|
+
async def delete(self, evaluation_id: int) -> dict[str, str]:
|
|
204
|
+
return await self._delete(f"/api/evaluations/{evaluation_id}")
|
|
205
|
+
|
|
206
|
+
async def create_test_case(self, evaluation_id: int, params: CreateTestCaseParams) -> TestCase:
|
|
207
|
+
data = await self._post(
|
|
208
|
+
f"/api/evaluations/{evaluation_id}/test-cases",
|
|
209
|
+
json=params.model_dump(by_alias=True, exclude_none=True),
|
|
210
|
+
)
|
|
211
|
+
return TestCase.model_validate(data)
|
|
212
|
+
|
|
213
|
+
async def list_test_cases(self, evaluation_id: int) -> builtins.list[TestCase]:
|
|
214
|
+
data = await self._get(f"/api/evaluations/{evaluation_id}/test-cases")
|
|
215
|
+
items = data if isinstance(data, list) else data.get("data", data.get("testCases", []))
|
|
216
|
+
return [TestCase.model_validate(tc) for tc in items]
|
|
217
|
+
|
|
218
|
+
async def create_run(self, evaluation_id: int, params: CreateRunParams | None = None) -> EvaluationRun:
|
|
219
|
+
body = (params or CreateRunParams()).model_dump(by_alias=True, exclude_none=True)
|
|
220
|
+
data = await self._post(f"/api/evaluations/{evaluation_id}/runs", json=body)
|
|
221
|
+
return EvaluationRun.model_validate(data)
|
|
222
|
+
|
|
223
|
+
async def list_runs(self, evaluation_id: int) -> builtins.list[EvaluationRun]:
|
|
224
|
+
data = await self._get(f"/api/evaluations/{evaluation_id}/runs")
|
|
225
|
+
items = data if isinstance(data, list) else data.get("data", data.get("runs", []))
|
|
226
|
+
return [EvaluationRun.model_validate(r) for r in items]
|
|
227
|
+
|
|
228
|
+
async def get_run(self, evaluation_id: int, run_id: int) -> EvaluationRun:
|
|
229
|
+
data = await self._get(f"/api/evaluations/{evaluation_id}/runs/{run_id}")
|
|
230
|
+
return EvaluationRun.model_validate(data)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class LLMJudgeAPI(_BaseAPI):
|
|
234
|
+
async def evaluate(self, params: RunLLMJudgeParams) -> dict[str, Any]:
|
|
235
|
+
return await self._post("/api/llm-judge/evaluate", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
236
|
+
|
|
237
|
+
async def create_config(self, params: CreateLLMJudgeConfigParams) -> LLMJudgeConfig:
|
|
238
|
+
data = await self._post("/api/llm-judge/configs", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
239
|
+
return LLMJudgeConfig.model_validate(data)
|
|
240
|
+
|
|
241
|
+
async def list_configs(self, params: ListLLMJudgeConfigsParams | None = None) -> list[LLMJudgeConfig]:
|
|
242
|
+
q = (params or ListLLMJudgeConfigsParams()).model_dump(by_alias=True, exclude_none=True)
|
|
243
|
+
data = await self._get("/api/llm-judge/configs", params=q)
|
|
244
|
+
items = data if isinstance(data, list) else data.get("data", [])
|
|
245
|
+
return [LLMJudgeConfig.model_validate(c) for c in items]
|
|
246
|
+
|
|
247
|
+
async def list_results(self, params: ListLLMJudgeResultsParams | None = None) -> list[LLMJudgeResult]:
|
|
248
|
+
q = (params or ListLLMJudgeResultsParams()).model_dump(by_alias=True, exclude_none=True)
|
|
249
|
+
data = await self._get("/api/llm-judge/results", params=q)
|
|
250
|
+
items = data if isinstance(data, list) else data.get("data", [])
|
|
251
|
+
return [LLMJudgeResult.model_validate(r) for r in items]
|
|
252
|
+
|
|
253
|
+
async def get_alignment(self, params: GetLLMJudgeAlignmentParams) -> LLMJudgeAlignment:
|
|
254
|
+
data = await self._get(f"/api/llm-judge/configs/{params.config_id}/alignment")
|
|
255
|
+
return LLMJudgeAlignment.model_validate(data)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class AnnotationsAPI(_BaseAPI):
|
|
259
|
+
def __init__(self, client: AIEvalClient) -> None:
|
|
260
|
+
super().__init__(client)
|
|
261
|
+
self.tasks = _AnnotationTasksAPI(client)
|
|
262
|
+
|
|
263
|
+
async def create(self, params: CreateAnnotationParams) -> Annotation:
|
|
264
|
+
data = await self._post("/api/annotations", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
265
|
+
payload = data.get("annotation", data) if isinstance(data, dict) else data
|
|
266
|
+
return Annotation.model_validate(payload)
|
|
267
|
+
|
|
268
|
+
async def list(self, params: ListAnnotationsParams | None = None) -> builtins.list[Annotation]:
|
|
269
|
+
q = (params or ListAnnotationsParams()).model_dump(by_alias=True, exclude_none=True)
|
|
270
|
+
data = await self._get("/api/annotations", params=q)
|
|
271
|
+
items = data.get("annotations", []) if isinstance(data, dict) else data
|
|
272
|
+
return [Annotation.model_validate(a) for a in items]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class _AnnotationTasksAPI(_BaseAPI):
|
|
276
|
+
def __init__(self, client: AIEvalClient) -> None:
|
|
277
|
+
super().__init__(client)
|
|
278
|
+
self.items = _AnnotationItemsAPI(client)
|
|
279
|
+
|
|
280
|
+
async def create(self, params: CreateAnnotationTaskParams) -> AnnotationTask:
|
|
281
|
+
data = await self._post("/api/annotation-tasks", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
282
|
+
return AnnotationTask.model_validate(data)
|
|
283
|
+
|
|
284
|
+
async def list(self, params: ListAnnotationTasksParams | None = None) -> builtins.list[AnnotationTask]:
|
|
285
|
+
q = (params or ListAnnotationTasksParams()).model_dump(by_alias=True, exclude_none=True)
|
|
286
|
+
data = await self._get("/api/annotation-tasks", params=q)
|
|
287
|
+
items = data if isinstance(data, list) else data.get("data", [])
|
|
288
|
+
return [AnnotationTask.model_validate(t) for t in items]
|
|
289
|
+
|
|
290
|
+
async def get(self, task_id: int) -> AnnotationTask:
|
|
291
|
+
data = await self._get(f"/api/annotation-tasks/{task_id}")
|
|
292
|
+
return AnnotationTask.model_validate(data)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class _AnnotationItemsAPI(_BaseAPI):
|
|
296
|
+
async def create(self, task_id: int, params: CreateAnnotationItemParams) -> AnnotationItem:
|
|
297
|
+
payload = params.model_dump(by_alias=True, exclude_none=True)
|
|
298
|
+
data = await self._post(f"/api/annotation-tasks/{task_id}/items", json=payload)
|
|
299
|
+
return AnnotationItem.model_validate(data)
|
|
300
|
+
|
|
301
|
+
async def list(
|
|
302
|
+
self,
|
|
303
|
+
task_id: int,
|
|
304
|
+
params: ListAnnotationItemsParams | None = None,
|
|
305
|
+
) -> builtins.list[AnnotationItem]:
|
|
306
|
+
q = (params or ListAnnotationItemsParams()).model_dump(by_alias=True, exclude_none=True)
|
|
307
|
+
data = await self._get(f"/api/annotation-tasks/{task_id}/items", params=q)
|
|
308
|
+
items = data if isinstance(data, list) else data.get("data", [])
|
|
309
|
+
return [AnnotationItem.model_validate(i) for i in items]
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class DeveloperAPI(_BaseAPI):
|
|
313
|
+
def __init__(self, client: AIEvalClient) -> None:
|
|
314
|
+
super().__init__(client)
|
|
315
|
+
self.api_keys = _APIKeysAPI(client)
|
|
316
|
+
self.webhooks = _WebhooksAPI(client)
|
|
317
|
+
|
|
318
|
+
async def get_usage(self, params: GetUsageParams) -> UsageStats:
|
|
319
|
+
data = await self._get("/api/developer/usage", params=params.model_dump(by_alias=True, exclude_none=True))
|
|
320
|
+
return UsageStats.model_validate(data)
|
|
321
|
+
|
|
322
|
+
async def get_usage_summary(self, organization_id: int) -> UsageSummary:
|
|
323
|
+
data = await self._get("/api/developer/usage/summary", params={"organizationId": organization_id})
|
|
324
|
+
return UsageSummary.model_validate(data)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class _APIKeysAPI(_BaseAPI):
|
|
328
|
+
async def create(self, params: CreateAPIKeyParams) -> APIKeyWithSecret:
|
|
329
|
+
data = await self._post("/api/developer/api-keys", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
330
|
+
return APIKeyWithSecret.model_validate(data)
|
|
331
|
+
|
|
332
|
+
async def list(self, params: ListAPIKeysParams | None = None) -> builtins.list[APIKey]:
|
|
333
|
+
q = (params or ListAPIKeysParams()).model_dump(by_alias=True, exclude_none=True)
|
|
334
|
+
data = await self._get("/api/developer/api-keys", params=q)
|
|
335
|
+
items = data if isinstance(data, list) else data.get("data", data.get("apiKeys", []))
|
|
336
|
+
return [APIKey.model_validate(k) for k in items]
|
|
337
|
+
|
|
338
|
+
async def update(self, key_id: int, params: UpdateAPIKeyParams) -> APIKey:
|
|
339
|
+
payload = params.model_dump(by_alias=True, exclude_none=True)
|
|
340
|
+
data = await self._patch(f"/api/developer/api-keys/{key_id}", json=payload)
|
|
341
|
+
return APIKey.model_validate(data)
|
|
342
|
+
|
|
343
|
+
async def revoke(self, key_id: int) -> dict[str, str]:
|
|
344
|
+
return await self._delete(f"/api/developer/api-keys/{key_id}")
|
|
345
|
+
|
|
346
|
+
async def get_usage(self, key_id: int) -> APIKeyUsage:
|
|
347
|
+
data = await self._get(f"/api/developer/api-keys/{key_id}/usage")
|
|
348
|
+
return APIKeyUsage.model_validate(data)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
class _WebhooksAPI(_BaseAPI):
|
|
352
|
+
async def create(self, params: CreateWebhookParams) -> Webhook:
|
|
353
|
+
data = await self._post("/api/developer/webhooks", json=params.model_dump(by_alias=True, exclude_none=True))
|
|
354
|
+
return Webhook.model_validate(data)
|
|
355
|
+
|
|
356
|
+
async def list(self, params: ListWebhooksParams | None = None) -> builtins.list[Webhook]:
|
|
357
|
+
q = (params or ListWebhooksParams()).model_dump(by_alias=True, exclude_none=True)
|
|
358
|
+
data = await self._get("/api/developer/webhooks", params=q)
|
|
359
|
+
items = data if isinstance(data, list) else data.get("data", [])
|
|
360
|
+
return [Webhook.model_validate(w) for w in items]
|
|
361
|
+
|
|
362
|
+
async def get(self, webhook_id: int) -> Webhook:
|
|
363
|
+
data = await self._get(f"/api/developer/webhooks/{webhook_id}")
|
|
364
|
+
return Webhook.model_validate(data)
|
|
365
|
+
|
|
366
|
+
async def update(self, webhook_id: int, params: UpdateWebhookParams) -> Webhook:
|
|
367
|
+
payload = params.model_dump(by_alias=True, exclude_none=True)
|
|
368
|
+
data = await self._patch(f"/api/developer/webhooks/{webhook_id}", json=payload)
|
|
369
|
+
return Webhook.model_validate(data)
|
|
370
|
+
|
|
371
|
+
async def delete(self, webhook_id: int) -> dict[str, str]:
|
|
372
|
+
return await self._delete(f"/api/developer/webhooks/{webhook_id}")
|
|
373
|
+
|
|
374
|
+
async def get_deliveries(
|
|
375
|
+
self, webhook_id: int, params: ListWebhookDeliveriesParams | None = None
|
|
376
|
+
) -> builtins.list[WebhookDelivery]:
|
|
377
|
+
q = (params or ListWebhookDeliveriesParams()).model_dump(by_alias=True, exclude_none=True)
|
|
378
|
+
data = await self._get(f"/api/developer/webhooks/{webhook_id}/deliveries", params=q)
|
|
379
|
+
items = data if isinstance(data, list) else data.get("data", [])
|
|
380
|
+
return [WebhookDelivery.model_validate(d) for d in items]
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
class OrganizationsAPI(_BaseAPI):
|
|
384
|
+
async def get_current(self) -> Organization:
|
|
385
|
+
data = await self._get("/api/organizations/current")
|
|
386
|
+
payload = data.get("organization", data) if isinstance(data, dict) else data
|
|
387
|
+
return Organization.model_validate(payload)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
# ── Main client ──────────────────────────────────────────────────────
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class AIEvalClient:
|
|
394
|
+
"""Async client for the EvalGate API.
|
|
395
|
+
|
|
396
|
+
Usage::
|
|
397
|
+
|
|
398
|
+
client = AIEvalClient(api_key="sk-...")
|
|
399
|
+
|
|
400
|
+
# Or zero-config (reads EVALGATE_API_KEY env var)
|
|
401
|
+
client = AIEvalClient.init()
|
|
402
|
+
|
|
403
|
+
trace = await client.traces.create(CreateTraceParams(name="my-trace"))
|
|
404
|
+
"""
|
|
405
|
+
|
|
406
|
+
def __init__(
|
|
407
|
+
self,
|
|
408
|
+
api_key: str | None = None,
|
|
409
|
+
base_url: str | None = None,
|
|
410
|
+
organization_id: int | None = None,
|
|
411
|
+
timeout: int = 30_000,
|
|
412
|
+
debug: bool = False,
|
|
413
|
+
**kwargs: Any,
|
|
414
|
+
) -> None:
|
|
415
|
+
file_cfg = _load_config_file()
|
|
416
|
+
self._api_key = api_key or _env("EVALGATE_API_KEY", "EVALAI_API_KEY") or file_cfg.get("api_key", "")
|
|
417
|
+
|
|
418
|
+
if not self._api_key:
|
|
419
|
+
raise EvalGateError(
|
|
420
|
+
"API key is required. Provide via api_key= or EVALGATE_API_KEY env var.",
|
|
421
|
+
"MISSING_API_KEY",
|
|
422
|
+
0,
|
|
423
|
+
)
|
|
424
|
+
self._base_url = (
|
|
425
|
+
base_url
|
|
426
|
+
or _env("EVALGATE_BASE_URL", "EVALAI_BASE_URL")
|
|
427
|
+
or file_cfg.get("base_url")
|
|
428
|
+
or "http://localhost:3000"
|
|
429
|
+
).rstrip("/")
|
|
430
|
+
_org_env = _env("EVALGATE_ORGANIZATION_ID", "EVALAI_ORGANIZATION_ID")
|
|
431
|
+
self._organization_id = organization_id or (int(_org_env) if _org_env else None)
|
|
432
|
+
self._timeout = timeout / 1000
|
|
433
|
+
self._debug = debug
|
|
434
|
+
self._config = ClientConfig(
|
|
435
|
+
api_key=self._api_key,
|
|
436
|
+
base_url=self._base_url,
|
|
437
|
+
organization_id=self._organization_id,
|
|
438
|
+
timeout=timeout,
|
|
439
|
+
debug=debug,
|
|
440
|
+
**kwargs,
|
|
441
|
+
)
|
|
442
|
+
self._http: httpx.AsyncClient | None = None
|
|
443
|
+
|
|
444
|
+
# API sub-modules
|
|
445
|
+
self.traces = TraceAPI(self)
|
|
446
|
+
self.evaluations = EvaluationAPI(self)
|
|
447
|
+
self.llm_judge = LLMJudgeAPI(self)
|
|
448
|
+
self.annotations = AnnotationsAPI(self)
|
|
449
|
+
self.developer = DeveloperAPI(self)
|
|
450
|
+
self.organizations = OrganizationsAPI(self)
|
|
451
|
+
|
|
452
|
+
@classmethod
|
|
453
|
+
def init(cls, **kwargs: Any) -> AIEvalClient:
|
|
454
|
+
"""Zero-config factory — reads EVALGATE_API_KEY, EVALGATE_BASE_URL, EVALGATE_ORGANIZATION_ID."""
|
|
455
|
+
return cls(**kwargs)
|
|
456
|
+
|
|
457
|
+
@property
|
|
458
|
+
def api_key(self) -> str:
|
|
459
|
+
"""Return the configured API key."""
|
|
460
|
+
return self._api_key
|
|
461
|
+
|
|
462
|
+
@property
|
|
463
|
+
def organization_id(self) -> int | None:
|
|
464
|
+
return self._organization_id
|
|
465
|
+
|
|
466
|
+
# ── HTTP layer ───────────────────────────────────────────────
|
|
467
|
+
|
|
468
|
+
def _get_http(self) -> httpx.AsyncClient:
|
|
469
|
+
if self._http is None or self._http.is_closed:
|
|
470
|
+
headers: dict[str, str] = {
|
|
471
|
+
"User-Agent": f"evalgate-python/{SDK_VERSION}",
|
|
472
|
+
"Content-Type": "application/json",
|
|
473
|
+
"X-EvalGate-SDK-Version": SDK_VERSION,
|
|
474
|
+
"X-EvalGate-Spec-Version": SPEC_VERSION,
|
|
475
|
+
}
|
|
476
|
+
if self._api_key:
|
|
477
|
+
headers["Authorization"] = f"Bearer {self._api_key}"
|
|
478
|
+
if self._organization_id is not None:
|
|
479
|
+
headers["X-Organization-Id"] = str(self._organization_id)
|
|
480
|
+
|
|
481
|
+
self._http = httpx.AsyncClient(
|
|
482
|
+
base_url=self._base_url,
|
|
483
|
+
headers=headers,
|
|
484
|
+
timeout=httpx.Timeout(self._timeout),
|
|
485
|
+
)
|
|
486
|
+
return self._http
|
|
487
|
+
|
|
488
|
+
async def _request(
|
|
489
|
+
self,
|
|
490
|
+
method: str,
|
|
491
|
+
path: str,
|
|
492
|
+
*,
|
|
493
|
+
params: dict[str, Any] | None = None,
|
|
494
|
+
json: dict[str, Any] | None = None,
|
|
495
|
+
) -> Any:
|
|
496
|
+
max_attempts = self._config.retry.max_attempts
|
|
497
|
+
last_error: Exception | None = None
|
|
498
|
+
|
|
499
|
+
for attempt in range(1, max_attempts + 1):
|
|
500
|
+
try:
|
|
501
|
+
http = self._get_http()
|
|
502
|
+
resp = await http.request(method, path, params=params, json=json)
|
|
503
|
+
|
|
504
|
+
if self._debug:
|
|
505
|
+
logger.debug("%s %s → %s", method, path, resp.status_code)
|
|
506
|
+
|
|
507
|
+
if resp.status_code >= 400:
|
|
508
|
+
try:
|
|
509
|
+
data = resp.json()
|
|
510
|
+
except Exception:
|
|
511
|
+
data = resp.text
|
|
512
|
+
err = create_error_from_response(resp.status_code, data)
|
|
513
|
+
if err.should_retry() and attempt < max_attempts:
|
|
514
|
+
wait = (2 ** (attempt - 1)) * 0.5
|
|
515
|
+
if err.retry_after:
|
|
516
|
+
wait = err.retry_after
|
|
517
|
+
logger.warning("Retrying %s %s (attempt %d) in %.1fs", method, path, attempt, wait)
|
|
518
|
+
import asyncio
|
|
519
|
+
|
|
520
|
+
await asyncio.sleep(wait)
|
|
521
|
+
last_error = err
|
|
522
|
+
continue
|
|
523
|
+
raise err
|
|
524
|
+
|
|
525
|
+
if resp.status_code == 204:
|
|
526
|
+
return {}
|
|
527
|
+
return resp.json()
|
|
528
|
+
|
|
529
|
+
except EvalGateError:
|
|
530
|
+
raise
|
|
531
|
+
except httpx.TimeoutException as exc:
|
|
532
|
+
last_error = EvalGateError(str(exc), "TIMEOUT", 408)
|
|
533
|
+
if attempt < max_attempts:
|
|
534
|
+
import asyncio
|
|
535
|
+
|
|
536
|
+
await asyncio.sleep(2 ** (attempt - 1))
|
|
537
|
+
continue
|
|
538
|
+
raise last_error from exc
|
|
539
|
+
except httpx.HTTPError as exc:
|
|
540
|
+
last_error = NetworkError(str(exc))
|
|
541
|
+
if attempt < max_attempts:
|
|
542
|
+
import asyncio
|
|
543
|
+
|
|
544
|
+
await asyncio.sleep(2 ** (attempt - 1))
|
|
545
|
+
continue
|
|
546
|
+
raise last_error from exc
|
|
547
|
+
|
|
548
|
+
raise last_error or NetworkError("Request failed after retries")
|
|
549
|
+
|
|
550
|
+
async def get_quality(
|
|
551
|
+
self,
|
|
552
|
+
evaluation_id: int,
|
|
553
|
+
*,
|
|
554
|
+
baseline: str = "published",
|
|
555
|
+
) -> QualityScore:
|
|
556
|
+
"""Fetch the latest quality score for an evaluation."""
|
|
557
|
+
data = await self._request(
|
|
558
|
+
"GET",
|
|
559
|
+
"/api/quality",
|
|
560
|
+
params={
|
|
561
|
+
"evaluationId": str(evaluation_id),
|
|
562
|
+
"action": "latest",
|
|
563
|
+
"baseline": baseline,
|
|
564
|
+
},
|
|
565
|
+
)
|
|
566
|
+
return QualityScore.model_validate(data)
|
|
567
|
+
|
|
568
|
+
async def get_run_export(
|
|
569
|
+
self,
|
|
570
|
+
evaluation_id: int,
|
|
571
|
+
run_id: int,
|
|
572
|
+
) -> dict[str, Any]:
|
|
573
|
+
"""Fetch structured export data for a run."""
|
|
574
|
+
return await self._request(
|
|
575
|
+
"GET",
|
|
576
|
+
f"/api/evaluations/{evaluation_id}/runs/{run_id}/export",
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
async def publish_share(
|
|
580
|
+
self,
|
|
581
|
+
evaluation_id: int,
|
|
582
|
+
export_data: dict[str, Any],
|
|
583
|
+
evaluation_run_id: int,
|
|
584
|
+
*,
|
|
585
|
+
expires_in_days: int | None = None,
|
|
586
|
+
) -> dict[str, Any]:
|
|
587
|
+
"""Publish a share link for a run."""
|
|
588
|
+
body: dict[str, Any] = {
|
|
589
|
+
"exportData": export_data,
|
|
590
|
+
"shareScope": "run",
|
|
591
|
+
"evaluationRunId": evaluation_run_id,
|
|
592
|
+
}
|
|
593
|
+
if expires_in_days is not None:
|
|
594
|
+
body["expiresInDays"] = expires_in_days
|
|
595
|
+
return await self._request(
|
|
596
|
+
"POST",
|
|
597
|
+
f"/api/evaluations/{evaluation_id}/publish",
|
|
598
|
+
json=body,
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
async def close(self) -> None:
|
|
602
|
+
if self._http and not self._http.is_closed:
|
|
603
|
+
await self._http.aclose()
|
|
604
|
+
|
|
605
|
+
async def __aenter__(self) -> AIEvalClient:
|
|
606
|
+
return self
|
|
607
|
+
|
|
608
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
609
|
+
await self.close()
|