evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. evalgate_sdk/__init__.py +707 -0
  2. evalgate_sdk/_version.py +3 -0
  3. evalgate_sdk/assertions.py +1362 -0
  4. evalgate_sdk/auto.py +247 -0
  5. evalgate_sdk/batch.py +174 -0
  6. evalgate_sdk/cache.py +111 -0
  7. evalgate_sdk/ci_context.py +123 -0
  8. evalgate_sdk/cli/__init__.py +111 -0
  9. evalgate_sdk/cli/api.py +261 -0
  10. evalgate_sdk/cli/cli_constants.py +20 -0
  11. evalgate_sdk/cli/commands.py +1041 -0
  12. evalgate_sdk/cli/config.py +228 -0
  13. evalgate_sdk/cli/env.py +43 -0
  14. evalgate_sdk/cli/formatters/types.py +132 -0
  15. evalgate_sdk/cli/golden_commands.py +322 -0
  16. evalgate_sdk/cli/manifest.py +301 -0
  17. evalgate_sdk/cli/new_commands.py +435 -0
  18. evalgate_sdk/cli/policy_packs.py +103 -0
  19. evalgate_sdk/cli/profiles.py +12 -0
  20. evalgate_sdk/cli/regression_gate.py +312 -0
  21. evalgate_sdk/cli/render/__init__.py +1 -0
  22. evalgate_sdk/cli/render/snippet.py +18 -0
  23. evalgate_sdk/cli/render/sort.py +29 -0
  24. evalgate_sdk/cli/report/__init__.py +1 -0
  25. evalgate_sdk/cli/report/build_check_report.py +209 -0
  26. evalgate_sdk/cli/traces.py +186 -0
  27. evalgate_sdk/cli/workspace.py +63 -0
  28. evalgate_sdk/client.py +609 -0
  29. evalgate_sdk/cluster.py +359 -0
  30. evalgate_sdk/collector.py +161 -0
  31. evalgate_sdk/constants.py +6 -0
  32. evalgate_sdk/context.py +151 -0
  33. evalgate_sdk/errors.py +236 -0
  34. evalgate_sdk/export.py +238 -0
  35. evalgate_sdk/formatters/__init__.py +11 -0
  36. evalgate_sdk/formatters/github.py +51 -0
  37. evalgate_sdk/formatters/human.py +68 -0
  38. evalgate_sdk/formatters/json_fmt.py +11 -0
  39. evalgate_sdk/formatters/pr_comment.py +80 -0
  40. evalgate_sdk/golden.py +426 -0
  41. evalgate_sdk/integrations/__init__.py +1 -0
  42. evalgate_sdk/integrations/anthropic.py +99 -0
  43. evalgate_sdk/integrations/autogen.py +62 -0
  44. evalgate_sdk/integrations/crewai.py +61 -0
  45. evalgate_sdk/integrations/langchain.py +100 -0
  46. evalgate_sdk/integrations/openai.py +155 -0
  47. evalgate_sdk/integrations/openai_eval.py +221 -0
  48. evalgate_sdk/local.py +144 -0
  49. evalgate_sdk/logger.py +123 -0
  50. evalgate_sdk/matchers.py +62 -0
  51. evalgate_sdk/otel.py +256 -0
  52. evalgate_sdk/pagination.py +145 -0
  53. evalgate_sdk/py.typed +0 -0
  54. evalgate_sdk/pytest_plugin.py +96 -0
  55. evalgate_sdk/reason_codes.py +103 -0
  56. evalgate_sdk/regression.py +196 -0
  57. evalgate_sdk/replay_decision.py +115 -0
  58. evalgate_sdk/runtime/__init__.py +50 -0
  59. evalgate_sdk/runtime/adapters/__init__.py +1 -0
  60. evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  61. evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  62. evalgate_sdk/runtime/context.py +68 -0
  63. evalgate_sdk/runtime/eval.py +318 -0
  64. evalgate_sdk/runtime/execution_mode.py +170 -0
  65. evalgate_sdk/runtime/executor.py +92 -0
  66. evalgate_sdk/runtime/registry.py +125 -0
  67. evalgate_sdk/runtime/run_report.py +249 -0
  68. evalgate_sdk/runtime/types.py +143 -0
  69. evalgate_sdk/snapshot.py +219 -0
  70. evalgate_sdk/streaming.py +124 -0
  71. evalgate_sdk/synthesize.py +226 -0
  72. evalgate_sdk/testing.py +128 -0
  73. evalgate_sdk/types.py +666 -0
  74. evalgate_sdk/utils/__init__.py +1 -0
  75. evalgate_sdk/utils/input_hash.py +42 -0
  76. evalgate_sdk/workflows.py +264 -0
  77. evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
  78. evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
  79. evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
  80. evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
evalgate_sdk/client.py ADDED
@@ -0,0 +1,609 @@
1
+ """AIEvalClient — async HTTP client for the EvalGate API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import builtins
6
+ import logging
7
+ import os
8
+ from typing import Any, TypeVar
9
+
10
+ import httpx
11
+
12
+ from evalgate_sdk._version import SDK_VERSION, SPEC_VERSION
13
+ from evalgate_sdk.errors import (
14
+ EvalGateError,
15
+ NetworkError,
16
+ create_error_from_response,
17
+ )
18
+ from evalgate_sdk.types import (
19
+ Annotation,
20
+ AnnotationItem,
21
+ AnnotationTask,
22
+ APIKey,
23
+ APIKeyUsage,
24
+ APIKeyWithSecret,
25
+ ClientConfig,
26
+ CreateAnnotationItemParams,
27
+ CreateAnnotationParams,
28
+ CreateAnnotationTaskParams,
29
+ CreateAPIKeyParams,
30
+ CreateEvaluationParams,
31
+ CreateLLMJudgeConfigParams,
32
+ CreateRunParams,
33
+ CreateSpanParams,
34
+ CreateTestCaseParams,
35
+ CreateTraceParams,
36
+ CreateWebhookParams,
37
+ Evaluation,
38
+ EvaluationRun,
39
+ GetLLMJudgeAlignmentParams,
40
+ GetUsageParams,
41
+ ListAnnotationItemsParams,
42
+ ListAnnotationsParams,
43
+ ListAnnotationTasksParams,
44
+ ListAPIKeysParams,
45
+ ListEvaluationsParams,
46
+ ListLLMJudgeConfigsParams,
47
+ ListLLMJudgeResultsParams,
48
+ ListTracesParams,
49
+ ListWebhookDeliveriesParams,
50
+ ListWebhooksParams,
51
+ LLMJudgeAlignment,
52
+ LLMJudgeConfig,
53
+ LLMJudgeResult,
54
+ Organization,
55
+ QualityScore,
56
+ RunLLMJudgeParams,
57
+ Span,
58
+ TestCase,
59
+ Trace,
60
+ UpdateAPIKeyParams,
61
+ UpdateEvaluationParams,
62
+ UpdateTraceParams,
63
+ UpdateWebhookParams,
64
+ UsageStats,
65
+ UsageSummary,
66
+ Webhook,
67
+ WebhookDelivery,
68
+ )
69
+
70
+ logger = logging.getLogger("evalgate_sdk")
71
+
72
+ T = TypeVar("T")
73
+
74
+
75
+ _LEGACY_WARNED: set[str] = set()
76
+
77
+
78
+ def _env(name: str, legacy: str | None = None) -> str | None:
79
+ v = os.environ.get(name)
80
+ if v:
81
+ return v
82
+ if legacy:
83
+ legacy_val = os.environ.get(legacy)
84
+ if legacy_val and legacy not in _LEGACY_WARNED:
85
+ import warnings
86
+
87
+ warnings.warn(
88
+ f"[EvalGate] Deprecation: {legacy} is deprecated. Use {name} instead.",
89
+ DeprecationWarning,
90
+ stacklevel=2,
91
+ )
92
+ _LEGACY_WARNED.add(legacy)
93
+ return legacy_val
94
+ return None
95
+
96
+
97
+ def _load_config_file() -> dict[str, Any]:
98
+ """Load saved config from .evalgate/config.json or .evalai/config.json if it exists."""
99
+ import json
100
+ import warnings
101
+ from pathlib import Path
102
+
103
+ for parent in [Path.cwd(), *Path.cwd().parents]:
104
+ for cfg_dir in (".evalgate", ".evalai"):
105
+ config_path = parent / cfg_dir / "config.json"
106
+ if config_path.exists():
107
+ try:
108
+ data = json.loads(config_path.read_text())
109
+ if cfg_dir == ".evalai" and ".evalai" not in _LEGACY_WARNED:
110
+ warnings.warn(
111
+ "[EvalGate] Deprecation: .evalai/ config is deprecated. "
112
+ "Migrate to .evalgate/ (e.g. mv .evalai .evalgate).",
113
+ DeprecationWarning,
114
+ stacklevel=2,
115
+ )
116
+ _LEGACY_WARNED.add(".evalai")
117
+ return data if isinstance(data, dict) else {}
118
+ except Exception:
119
+ pass
120
+ return {}
121
+
122
+
123
+ class _BaseAPI:
124
+ """Shared HTTP helpers for every API sub-module."""
125
+
126
+ def __init__(self, client: AIEvalClient) -> None:
127
+ self._c = client
128
+
129
+ async def _get(self, path: str, params: dict[str, Any] | None = None) -> Any:
130
+ return await self._c._request("GET", path, params=params)
131
+
132
+ async def _post(self, path: str, json: dict[str, Any] | None = None) -> Any:
133
+ return await self._c._request("POST", path, json=json)
134
+
135
+ async def _patch(self, path: str, json: dict[str, Any] | None = None) -> Any:
136
+ return await self._c._request("PATCH", path, json=json)
137
+
138
+ async def _put(self, path: str, json: dict[str, Any] | None = None) -> Any:
139
+ return await self._c._request("PUT", path, json=json)
140
+
141
+ async def _delete(self, path: str) -> Any:
142
+ return await self._c._request("DELETE", path)
143
+
144
+
145
+ # ── API sub-modules ──────────────────────────────────────────────────
146
+
147
+
148
+ class TraceAPI(_BaseAPI):
149
+ async def create(self, params: CreateTraceParams) -> Trace:
150
+ data = await self._post("/api/traces", json=params.model_dump(by_alias=True, exclude_none=True))
151
+ return Trace.model_validate(data)
152
+
153
+ async def list(self, params: ListTracesParams | None = None) -> builtins.list[Trace]:
154
+ q = (params or ListTracesParams()).model_dump(by_alias=True, exclude_none=True)
155
+ data = await self._get("/api/traces", params=q)
156
+ items = data if isinstance(data, list) else data.get("data", data.get("traces", []))
157
+ return [Trace.model_validate(t) for t in items]
158
+
159
+ async def get(self, trace_id: int) -> Trace:
160
+ data = await self._get(f"/api/traces/{trace_id}")
161
+ return Trace.model_validate(data)
162
+
163
+ async def update(self, trace_id: int, params: UpdateTraceParams) -> Trace:
164
+ data = await self._patch(f"/api/traces/{trace_id}", json=params.model_dump(by_alias=True, exclude_none=True))
165
+ return Trace.model_validate(data)
166
+
167
+ async def delete(self, trace_id: int) -> dict[str, str]:
168
+ return await self._delete(f"/api/traces/{trace_id}")
169
+
170
+ async def create_span(self, trace_id: int, params: CreateSpanParams) -> Span:
171
+ payload = params.model_dump(by_alias=True, exclude_none=True)
172
+ data = await self._post(f"/api/traces/{trace_id}/spans", json=payload)
173
+ return Span.model_validate(data)
174
+
175
+ async def list_spans(self, trace_id: int) -> builtins.list[Span]:
176
+ data = await self._get(f"/api/traces/{trace_id}/spans")
177
+ items = data if isinstance(data, list) else data.get("data", data.get("spans", []))
178
+ return [Span.model_validate(s) for s in items]
179
+
180
+
181
+ class EvaluationAPI(_BaseAPI):
182
+ async def create(self, params: CreateEvaluationParams) -> Evaluation:
183
+ data = await self._post("/api/evaluations", json=params.model_dump(by_alias=True, exclude_none=True))
184
+ return Evaluation.model_validate(data)
185
+
186
+ async def get(self, evaluation_id: int) -> Evaluation:
187
+ data = await self._get(f"/api/evaluations/{evaluation_id}")
188
+ payload = data.get("evaluation", data) if isinstance(data, dict) else data
189
+ return Evaluation.model_validate(payload)
190
+
191
+ async def list(self, params: ListEvaluationsParams | None = None) -> builtins.list[Evaluation]:
192
+ q = (params or ListEvaluationsParams()).model_dump(by_alias=True, exclude_none=True)
193
+ data = await self._get("/api/evaluations", params=q)
194
+ items = data if isinstance(data, list) else data.get("data", data.get("evaluations", []))
195
+ return [Evaluation.model_validate(e) for e in items]
196
+
197
+ async def update(self, evaluation_id: int, params: UpdateEvaluationParams) -> Evaluation:
198
+ body = params.model_dump(by_alias=True, exclude_none=True)
199
+ data = await self._patch(f"/api/evaluations/{evaluation_id}", json=body)
200
+ payload = data.get("evaluation", data) if isinstance(data, dict) else data
201
+ return Evaluation.model_validate(payload)
202
+
203
+ async def delete(self, evaluation_id: int) -> dict[str, str]:
204
+ return await self._delete(f"/api/evaluations/{evaluation_id}")
205
+
206
+ async def create_test_case(self, evaluation_id: int, params: CreateTestCaseParams) -> TestCase:
207
+ data = await self._post(
208
+ f"/api/evaluations/{evaluation_id}/test-cases",
209
+ json=params.model_dump(by_alias=True, exclude_none=True),
210
+ )
211
+ return TestCase.model_validate(data)
212
+
213
+ async def list_test_cases(self, evaluation_id: int) -> builtins.list[TestCase]:
214
+ data = await self._get(f"/api/evaluations/{evaluation_id}/test-cases")
215
+ items = data if isinstance(data, list) else data.get("data", data.get("testCases", []))
216
+ return [TestCase.model_validate(tc) for tc in items]
217
+
218
+ async def create_run(self, evaluation_id: int, params: CreateRunParams | None = None) -> EvaluationRun:
219
+ body = (params or CreateRunParams()).model_dump(by_alias=True, exclude_none=True)
220
+ data = await self._post(f"/api/evaluations/{evaluation_id}/runs", json=body)
221
+ return EvaluationRun.model_validate(data)
222
+
223
+ async def list_runs(self, evaluation_id: int) -> builtins.list[EvaluationRun]:
224
+ data = await self._get(f"/api/evaluations/{evaluation_id}/runs")
225
+ items = data if isinstance(data, list) else data.get("data", data.get("runs", []))
226
+ return [EvaluationRun.model_validate(r) for r in items]
227
+
228
+ async def get_run(self, evaluation_id: int, run_id: int) -> EvaluationRun:
229
+ data = await self._get(f"/api/evaluations/{evaluation_id}/runs/{run_id}")
230
+ return EvaluationRun.model_validate(data)
231
+
232
+
233
+ class LLMJudgeAPI(_BaseAPI):
234
+ async def evaluate(self, params: RunLLMJudgeParams) -> dict[str, Any]:
235
+ return await self._post("/api/llm-judge/evaluate", json=params.model_dump(by_alias=True, exclude_none=True))
236
+
237
+ async def create_config(self, params: CreateLLMJudgeConfigParams) -> LLMJudgeConfig:
238
+ data = await self._post("/api/llm-judge/configs", json=params.model_dump(by_alias=True, exclude_none=True))
239
+ return LLMJudgeConfig.model_validate(data)
240
+
241
+ async def list_configs(self, params: ListLLMJudgeConfigsParams | None = None) -> list[LLMJudgeConfig]:
242
+ q = (params or ListLLMJudgeConfigsParams()).model_dump(by_alias=True, exclude_none=True)
243
+ data = await self._get("/api/llm-judge/configs", params=q)
244
+ items = data if isinstance(data, list) else data.get("data", [])
245
+ return [LLMJudgeConfig.model_validate(c) for c in items]
246
+
247
+ async def list_results(self, params: ListLLMJudgeResultsParams | None = None) -> list[LLMJudgeResult]:
248
+ q = (params or ListLLMJudgeResultsParams()).model_dump(by_alias=True, exclude_none=True)
249
+ data = await self._get("/api/llm-judge/results", params=q)
250
+ items = data if isinstance(data, list) else data.get("data", [])
251
+ return [LLMJudgeResult.model_validate(r) for r in items]
252
+
253
+ async def get_alignment(self, params: GetLLMJudgeAlignmentParams) -> LLMJudgeAlignment:
254
+ data = await self._get(f"/api/llm-judge/configs/{params.config_id}/alignment")
255
+ return LLMJudgeAlignment.model_validate(data)
256
+
257
+
258
+ class AnnotationsAPI(_BaseAPI):
259
+ def __init__(self, client: AIEvalClient) -> None:
260
+ super().__init__(client)
261
+ self.tasks = _AnnotationTasksAPI(client)
262
+
263
+ async def create(self, params: CreateAnnotationParams) -> Annotation:
264
+ data = await self._post("/api/annotations", json=params.model_dump(by_alias=True, exclude_none=True))
265
+ payload = data.get("annotation", data) if isinstance(data, dict) else data
266
+ return Annotation.model_validate(payload)
267
+
268
+ async def list(self, params: ListAnnotationsParams | None = None) -> builtins.list[Annotation]:
269
+ q = (params or ListAnnotationsParams()).model_dump(by_alias=True, exclude_none=True)
270
+ data = await self._get("/api/annotations", params=q)
271
+ items = data.get("annotations", []) if isinstance(data, dict) else data
272
+ return [Annotation.model_validate(a) for a in items]
273
+
274
+
275
+ class _AnnotationTasksAPI(_BaseAPI):
276
+ def __init__(self, client: AIEvalClient) -> None:
277
+ super().__init__(client)
278
+ self.items = _AnnotationItemsAPI(client)
279
+
280
+ async def create(self, params: CreateAnnotationTaskParams) -> AnnotationTask:
281
+ data = await self._post("/api/annotation-tasks", json=params.model_dump(by_alias=True, exclude_none=True))
282
+ return AnnotationTask.model_validate(data)
283
+
284
+ async def list(self, params: ListAnnotationTasksParams | None = None) -> builtins.list[AnnotationTask]:
285
+ q = (params or ListAnnotationTasksParams()).model_dump(by_alias=True, exclude_none=True)
286
+ data = await self._get("/api/annotation-tasks", params=q)
287
+ items = data if isinstance(data, list) else data.get("data", [])
288
+ return [AnnotationTask.model_validate(t) for t in items]
289
+
290
+ async def get(self, task_id: int) -> AnnotationTask:
291
+ data = await self._get(f"/api/annotation-tasks/{task_id}")
292
+ return AnnotationTask.model_validate(data)
293
+
294
+
295
+ class _AnnotationItemsAPI(_BaseAPI):
296
+ async def create(self, task_id: int, params: CreateAnnotationItemParams) -> AnnotationItem:
297
+ payload = params.model_dump(by_alias=True, exclude_none=True)
298
+ data = await self._post(f"/api/annotation-tasks/{task_id}/items", json=payload)
299
+ return AnnotationItem.model_validate(data)
300
+
301
+ async def list(
302
+ self,
303
+ task_id: int,
304
+ params: ListAnnotationItemsParams | None = None,
305
+ ) -> builtins.list[AnnotationItem]:
306
+ q = (params or ListAnnotationItemsParams()).model_dump(by_alias=True, exclude_none=True)
307
+ data = await self._get(f"/api/annotation-tasks/{task_id}/items", params=q)
308
+ items = data if isinstance(data, list) else data.get("data", [])
309
+ return [AnnotationItem.model_validate(i) for i in items]
310
+
311
+
312
+ class DeveloperAPI(_BaseAPI):
313
+ def __init__(self, client: AIEvalClient) -> None:
314
+ super().__init__(client)
315
+ self.api_keys = _APIKeysAPI(client)
316
+ self.webhooks = _WebhooksAPI(client)
317
+
318
+ async def get_usage(self, params: GetUsageParams) -> UsageStats:
319
+ data = await self._get("/api/developer/usage", params=params.model_dump(by_alias=True, exclude_none=True))
320
+ return UsageStats.model_validate(data)
321
+
322
+ async def get_usage_summary(self, organization_id: int) -> UsageSummary:
323
+ data = await self._get("/api/developer/usage/summary", params={"organizationId": organization_id})
324
+ return UsageSummary.model_validate(data)
325
+
326
+
327
+ class _APIKeysAPI(_BaseAPI):
328
+ async def create(self, params: CreateAPIKeyParams) -> APIKeyWithSecret:
329
+ data = await self._post("/api/developer/api-keys", json=params.model_dump(by_alias=True, exclude_none=True))
330
+ return APIKeyWithSecret.model_validate(data)
331
+
332
+ async def list(self, params: ListAPIKeysParams | None = None) -> builtins.list[APIKey]:
333
+ q = (params or ListAPIKeysParams()).model_dump(by_alias=True, exclude_none=True)
334
+ data = await self._get("/api/developer/api-keys", params=q)
335
+ items = data if isinstance(data, list) else data.get("data", data.get("apiKeys", []))
336
+ return [APIKey.model_validate(k) for k in items]
337
+
338
+ async def update(self, key_id: int, params: UpdateAPIKeyParams) -> APIKey:
339
+ payload = params.model_dump(by_alias=True, exclude_none=True)
340
+ data = await self._patch(f"/api/developer/api-keys/{key_id}", json=payload)
341
+ return APIKey.model_validate(data)
342
+
343
+ async def revoke(self, key_id: int) -> dict[str, str]:
344
+ return await self._delete(f"/api/developer/api-keys/{key_id}")
345
+
346
+ async def get_usage(self, key_id: int) -> APIKeyUsage:
347
+ data = await self._get(f"/api/developer/api-keys/{key_id}/usage")
348
+ return APIKeyUsage.model_validate(data)
349
+
350
+
351
+ class _WebhooksAPI(_BaseAPI):
352
+ async def create(self, params: CreateWebhookParams) -> Webhook:
353
+ data = await self._post("/api/developer/webhooks", json=params.model_dump(by_alias=True, exclude_none=True))
354
+ return Webhook.model_validate(data)
355
+
356
+ async def list(self, params: ListWebhooksParams | None = None) -> builtins.list[Webhook]:
357
+ q = (params or ListWebhooksParams()).model_dump(by_alias=True, exclude_none=True)
358
+ data = await self._get("/api/developer/webhooks", params=q)
359
+ items = data if isinstance(data, list) else data.get("data", [])
360
+ return [Webhook.model_validate(w) for w in items]
361
+
362
+ async def get(self, webhook_id: int) -> Webhook:
363
+ data = await self._get(f"/api/developer/webhooks/{webhook_id}")
364
+ return Webhook.model_validate(data)
365
+
366
+ async def update(self, webhook_id: int, params: UpdateWebhookParams) -> Webhook:
367
+ payload = params.model_dump(by_alias=True, exclude_none=True)
368
+ data = await self._patch(f"/api/developer/webhooks/{webhook_id}", json=payload)
369
+ return Webhook.model_validate(data)
370
+
371
+ async def delete(self, webhook_id: int) -> dict[str, str]:
372
+ return await self._delete(f"/api/developer/webhooks/{webhook_id}")
373
+
374
+ async def get_deliveries(
375
+ self, webhook_id: int, params: ListWebhookDeliveriesParams | None = None
376
+ ) -> builtins.list[WebhookDelivery]:
377
+ q = (params or ListWebhookDeliveriesParams()).model_dump(by_alias=True, exclude_none=True)
378
+ data = await self._get(f"/api/developer/webhooks/{webhook_id}/deliveries", params=q)
379
+ items = data if isinstance(data, list) else data.get("data", [])
380
+ return [WebhookDelivery.model_validate(d) for d in items]
381
+
382
+
383
+ class OrganizationsAPI(_BaseAPI):
384
+ async def get_current(self) -> Organization:
385
+ data = await self._get("/api/organizations/current")
386
+ payload = data.get("organization", data) if isinstance(data, dict) else data
387
+ return Organization.model_validate(payload)
388
+
389
+
390
+ # ── Main client ──────────────────────────────────────────────────────
391
+
392
+
393
+ class AIEvalClient:
394
+ """Async client for the EvalGate API.
395
+
396
+ Usage::
397
+
398
+ client = AIEvalClient(api_key="sk-...")
399
+
400
+ # Or zero-config (reads EVALGATE_API_KEY env var)
401
+ client = AIEvalClient.init()
402
+
403
+ trace = await client.traces.create(CreateTraceParams(name="my-trace"))
404
+ """
405
+
406
+ def __init__(
407
+ self,
408
+ api_key: str | None = None,
409
+ base_url: str | None = None,
410
+ organization_id: int | None = None,
411
+ timeout: int = 30_000,
412
+ debug: bool = False,
413
+ **kwargs: Any,
414
+ ) -> None:
415
+ file_cfg = _load_config_file()
416
+ self._api_key = api_key or _env("EVALGATE_API_KEY", "EVALAI_API_KEY") or file_cfg.get("api_key", "")
417
+
418
+ if not self._api_key:
419
+ raise EvalGateError(
420
+ "API key is required. Provide via api_key= or EVALGATE_API_KEY env var.",
421
+ "MISSING_API_KEY",
422
+ 0,
423
+ )
424
+ self._base_url = (
425
+ base_url
426
+ or _env("EVALGATE_BASE_URL", "EVALAI_BASE_URL")
427
+ or file_cfg.get("base_url")
428
+ or "http://localhost:3000"
429
+ ).rstrip("/")
430
+ _org_env = _env("EVALGATE_ORGANIZATION_ID", "EVALAI_ORGANIZATION_ID")
431
+ self._organization_id = organization_id or (int(_org_env) if _org_env else None)
432
+ self._timeout = timeout / 1000
433
+ self._debug = debug
434
+ self._config = ClientConfig(
435
+ api_key=self._api_key,
436
+ base_url=self._base_url,
437
+ organization_id=self._organization_id,
438
+ timeout=timeout,
439
+ debug=debug,
440
+ **kwargs,
441
+ )
442
+ self._http: httpx.AsyncClient | None = None
443
+
444
+ # API sub-modules
445
+ self.traces = TraceAPI(self)
446
+ self.evaluations = EvaluationAPI(self)
447
+ self.llm_judge = LLMJudgeAPI(self)
448
+ self.annotations = AnnotationsAPI(self)
449
+ self.developer = DeveloperAPI(self)
450
+ self.organizations = OrganizationsAPI(self)
451
+
452
+ @classmethod
453
+ def init(cls, **kwargs: Any) -> AIEvalClient:
454
+ """Zero-config factory — reads EVALGATE_API_KEY, EVALGATE_BASE_URL, EVALGATE_ORGANIZATION_ID."""
455
+ return cls(**kwargs)
456
+
457
+ @property
458
+ def api_key(self) -> str:
459
+ """Return the configured API key."""
460
+ return self._api_key
461
+
462
+ @property
463
+ def organization_id(self) -> int | None:
464
+ return self._organization_id
465
+
466
+ # ── HTTP layer ───────────────────────────────────────────────
467
+
468
+ def _get_http(self) -> httpx.AsyncClient:
469
+ if self._http is None or self._http.is_closed:
470
+ headers: dict[str, str] = {
471
+ "User-Agent": f"evalgate-python/{SDK_VERSION}",
472
+ "Content-Type": "application/json",
473
+ "X-EvalGate-SDK-Version": SDK_VERSION,
474
+ "X-EvalGate-Spec-Version": SPEC_VERSION,
475
+ }
476
+ if self._api_key:
477
+ headers["Authorization"] = f"Bearer {self._api_key}"
478
+ if self._organization_id is not None:
479
+ headers["X-Organization-Id"] = str(self._organization_id)
480
+
481
+ self._http = httpx.AsyncClient(
482
+ base_url=self._base_url,
483
+ headers=headers,
484
+ timeout=httpx.Timeout(self._timeout),
485
+ )
486
+ return self._http
487
+
488
+ async def _request(
489
+ self,
490
+ method: str,
491
+ path: str,
492
+ *,
493
+ params: dict[str, Any] | None = None,
494
+ json: dict[str, Any] | None = None,
495
+ ) -> Any:
496
+ max_attempts = self._config.retry.max_attempts
497
+ last_error: Exception | None = None
498
+
499
+ for attempt in range(1, max_attempts + 1):
500
+ try:
501
+ http = self._get_http()
502
+ resp = await http.request(method, path, params=params, json=json)
503
+
504
+ if self._debug:
505
+ logger.debug("%s %s → %s", method, path, resp.status_code)
506
+
507
+ if resp.status_code >= 400:
508
+ try:
509
+ data = resp.json()
510
+ except Exception:
511
+ data = resp.text
512
+ err = create_error_from_response(resp.status_code, data)
513
+ if err.should_retry() and attempt < max_attempts:
514
+ wait = (2 ** (attempt - 1)) * 0.5
515
+ if err.retry_after:
516
+ wait = err.retry_after
517
+ logger.warning("Retrying %s %s (attempt %d) in %.1fs", method, path, attempt, wait)
518
+ import asyncio
519
+
520
+ await asyncio.sleep(wait)
521
+ last_error = err
522
+ continue
523
+ raise err
524
+
525
+ if resp.status_code == 204:
526
+ return {}
527
+ return resp.json()
528
+
529
+ except EvalGateError:
530
+ raise
531
+ except httpx.TimeoutException as exc:
532
+ last_error = EvalGateError(str(exc), "TIMEOUT", 408)
533
+ if attempt < max_attempts:
534
+ import asyncio
535
+
536
+ await asyncio.sleep(2 ** (attempt - 1))
537
+ continue
538
+ raise last_error from exc
539
+ except httpx.HTTPError as exc:
540
+ last_error = NetworkError(str(exc))
541
+ if attempt < max_attempts:
542
+ import asyncio
543
+
544
+ await asyncio.sleep(2 ** (attempt - 1))
545
+ continue
546
+ raise last_error from exc
547
+
548
+ raise last_error or NetworkError("Request failed after retries")
549
+
550
+ async def get_quality(
551
+ self,
552
+ evaluation_id: int,
553
+ *,
554
+ baseline: str = "published",
555
+ ) -> QualityScore:
556
+ """Fetch the latest quality score for an evaluation."""
557
+ data = await self._request(
558
+ "GET",
559
+ "/api/quality",
560
+ params={
561
+ "evaluationId": str(evaluation_id),
562
+ "action": "latest",
563
+ "baseline": baseline,
564
+ },
565
+ )
566
+ return QualityScore.model_validate(data)
567
+
568
+ async def get_run_export(
569
+ self,
570
+ evaluation_id: int,
571
+ run_id: int,
572
+ ) -> dict[str, Any]:
573
+ """Fetch structured export data for a run."""
574
+ return await self._request(
575
+ "GET",
576
+ f"/api/evaluations/{evaluation_id}/runs/{run_id}/export",
577
+ )
578
+
579
+ async def publish_share(
580
+ self,
581
+ evaluation_id: int,
582
+ export_data: dict[str, Any],
583
+ evaluation_run_id: int,
584
+ *,
585
+ expires_in_days: int | None = None,
586
+ ) -> dict[str, Any]:
587
+ """Publish a share link for a run."""
588
+ body: dict[str, Any] = {
589
+ "exportData": export_data,
590
+ "shareScope": "run",
591
+ "evaluationRunId": evaluation_run_id,
592
+ }
593
+ if expires_in_days is not None:
594
+ body["expiresInDays"] = expires_in_days
595
+ return await self._request(
596
+ "POST",
597
+ f"/api/evaluations/{evaluation_id}/publish",
598
+ json=body,
599
+ )
600
+
601
+ async def close(self) -> None:
602
+ if self._http and not self._http.is_closed:
603
+ await self._http.aclose()
604
+
605
+ async def __aenter__(self) -> AIEvalClient:
606
+ return self
607
+
608
+ async def __aexit__(self, *args: Any) -> None:
609
+ await self.close()