evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. evalgate_sdk/__init__.py +707 -0
  2. evalgate_sdk/_version.py +3 -0
  3. evalgate_sdk/assertions.py +1362 -0
  4. evalgate_sdk/auto.py +247 -0
  5. evalgate_sdk/batch.py +174 -0
  6. evalgate_sdk/cache.py +111 -0
  7. evalgate_sdk/ci_context.py +123 -0
  8. evalgate_sdk/cli/__init__.py +111 -0
  9. evalgate_sdk/cli/api.py +261 -0
  10. evalgate_sdk/cli/cli_constants.py +20 -0
  11. evalgate_sdk/cli/commands.py +1041 -0
  12. evalgate_sdk/cli/config.py +228 -0
  13. evalgate_sdk/cli/env.py +43 -0
  14. evalgate_sdk/cli/formatters/types.py +132 -0
  15. evalgate_sdk/cli/golden_commands.py +322 -0
  16. evalgate_sdk/cli/manifest.py +301 -0
  17. evalgate_sdk/cli/new_commands.py +435 -0
  18. evalgate_sdk/cli/policy_packs.py +103 -0
  19. evalgate_sdk/cli/profiles.py +12 -0
  20. evalgate_sdk/cli/regression_gate.py +312 -0
  21. evalgate_sdk/cli/render/__init__.py +1 -0
  22. evalgate_sdk/cli/render/snippet.py +18 -0
  23. evalgate_sdk/cli/render/sort.py +29 -0
  24. evalgate_sdk/cli/report/__init__.py +1 -0
  25. evalgate_sdk/cli/report/build_check_report.py +209 -0
  26. evalgate_sdk/cli/traces.py +186 -0
  27. evalgate_sdk/cli/workspace.py +63 -0
  28. evalgate_sdk/client.py +609 -0
  29. evalgate_sdk/cluster.py +359 -0
  30. evalgate_sdk/collector.py +161 -0
  31. evalgate_sdk/constants.py +6 -0
  32. evalgate_sdk/context.py +151 -0
  33. evalgate_sdk/errors.py +236 -0
  34. evalgate_sdk/export.py +238 -0
  35. evalgate_sdk/formatters/__init__.py +11 -0
  36. evalgate_sdk/formatters/github.py +51 -0
  37. evalgate_sdk/formatters/human.py +68 -0
  38. evalgate_sdk/formatters/json_fmt.py +11 -0
  39. evalgate_sdk/formatters/pr_comment.py +80 -0
  40. evalgate_sdk/golden.py +426 -0
  41. evalgate_sdk/integrations/__init__.py +1 -0
  42. evalgate_sdk/integrations/anthropic.py +99 -0
  43. evalgate_sdk/integrations/autogen.py +62 -0
  44. evalgate_sdk/integrations/crewai.py +61 -0
  45. evalgate_sdk/integrations/langchain.py +100 -0
  46. evalgate_sdk/integrations/openai.py +155 -0
  47. evalgate_sdk/integrations/openai_eval.py +221 -0
  48. evalgate_sdk/local.py +144 -0
  49. evalgate_sdk/logger.py +123 -0
  50. evalgate_sdk/matchers.py +62 -0
  51. evalgate_sdk/otel.py +256 -0
  52. evalgate_sdk/pagination.py +145 -0
  53. evalgate_sdk/py.typed +0 -0
  54. evalgate_sdk/pytest_plugin.py +96 -0
  55. evalgate_sdk/reason_codes.py +103 -0
  56. evalgate_sdk/regression.py +196 -0
  57. evalgate_sdk/replay_decision.py +115 -0
  58. evalgate_sdk/runtime/__init__.py +50 -0
  59. evalgate_sdk/runtime/adapters/__init__.py +1 -0
  60. evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  61. evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  62. evalgate_sdk/runtime/context.py +68 -0
  63. evalgate_sdk/runtime/eval.py +318 -0
  64. evalgate_sdk/runtime/execution_mode.py +170 -0
  65. evalgate_sdk/runtime/executor.py +92 -0
  66. evalgate_sdk/runtime/registry.py +125 -0
  67. evalgate_sdk/runtime/run_report.py +249 -0
  68. evalgate_sdk/runtime/types.py +143 -0
  69. evalgate_sdk/snapshot.py +219 -0
  70. evalgate_sdk/streaming.py +124 -0
  71. evalgate_sdk/synthesize.py +226 -0
  72. evalgate_sdk/testing.py +128 -0
  73. evalgate_sdk/types.py +666 -0
  74. evalgate_sdk/utils/__init__.py +1 -0
  75. evalgate_sdk/utils/input_hash.py +42 -0
  76. evalgate_sdk/workflows.py +264 -0
  77. evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
  78. evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
  79. evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
  80. evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,111 @@
1
+ """EvalGate CLI — command-line interface for the EvalGate.
2
+
3
+ This module lazily initializes the CLI app only when typer is available.
4
+ Submodules like config.py, api.py, etc. can be imported without typer.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ # Lazy initialization - only set up CLI when typer is available
10
+ app = None # Will be initialized on first access via get_app()
11
+
12
+
13
+ def _ensure_typer() -> None:
14
+ """Check that typer is installed, raise helpful error if not."""
15
+ try:
16
+ import typer # noqa: F401
17
+ except ImportError as exc:
18
+ raise SystemExit("CLI requires typer. Install with: pip install 'evalgate-sdk[cli]'") from exc
19
+
20
+
21
+ def get_app():
22
+ """Get the CLI app, initializing it if needed."""
23
+ global app
24
+ if app is not None:
25
+ return app
26
+
27
+ _ensure_typer()
28
+
29
+ import typer
30
+
31
+ app = typer.Typer(
32
+ name="evalgate",
33
+ help="EvalGate CLI — run evals, manage baselines, gate regressions.",
34
+ no_args_is_help=True,
35
+ )
36
+
37
+ from evalgate_sdk.cli.commands import (
38
+ baseline,
39
+ check,
40
+ ci,
41
+ configure,
42
+ diff,
43
+ discover,
44
+ doctor,
45
+ explain,
46
+ gate,
47
+ impact_analysis,
48
+ init,
49
+ migrate,
50
+ print_config,
51
+ run,
52
+ share,
53
+ upgrade,
54
+ )
55
+
56
+ app.command("init")(init)
57
+ app.command("run")(run)
58
+ app.command("gate")(gate)
59
+ app.command("check")(check)
60
+ app.command("ci")(ci)
61
+ app.command("doctor")(doctor)
62
+ app.command("discover")(discover)
63
+ app.command("diff")(diff)
64
+ app.command("explain")(explain)
65
+ app.command("baseline")(baseline)
66
+ app.command("print-config")(print_config)
67
+ app.command("share")(share)
68
+ app.command("configure")(configure)
69
+ app.command("upgrade")(upgrade)
70
+ app.command("impact-analysis")(impact_analysis)
71
+ app.command("migrate")(migrate)
72
+
73
+ from evalgate_sdk.cli.new_commands import (
74
+ compare,
75
+ promote,
76
+ replay,
77
+ start,
78
+ validate,
79
+ watch,
80
+ )
81
+
82
+ app.command("start")(start)
83
+ app.command("watch")(watch)
84
+ app.command("compare")(compare)
85
+ app.command("validate")(validate)
86
+ app.command("promote")(promote)
87
+ app.command("replay")(replay)
88
+
89
+ from evalgate_sdk.cli.golden_commands import analyze, auto_app, cluster, label, replay_decision, synthesize
90
+
91
+ app.command("cluster")(cluster)
92
+ app.command("analyze")(analyze)
93
+ app.command("label")(label)
94
+ app.command("synthesize")(synthesize)
95
+ app.command("replay-decision")(replay_decision)
96
+ app.add_typer(auto_app, name="auto")
97
+
98
+ return app
99
+
100
+
101
+ def __getattr__(name: str):
102
+ """Lazy attribute access for 'app'."""
103
+ if name == "app":
104
+ return get_app()
105
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
106
+
107
+
108
+ def main() -> None:
109
+ """CLI entry point."""
110
+ cli_app = get_app()
111
+ cli_app()
@@ -0,0 +1,261 @@
1
+ """API fetch helpers for evalgate CLI commands.
2
+
3
+ Captures x-request-id from response headers.
4
+ Sends X-EvalGate-SDK-Version and X-EvalGate-Spec-Version on all requests.
5
+
6
+ Port of ``cli/api.ts``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import Any
13
+ from urllib.parse import quote
14
+
15
+ import httpx
16
+
17
+ from evalgate_sdk._version import SDK_VERSION, SPEC_VERSION
18
+
19
+ DEFAULT_TIMEOUT = 30.0
20
+
21
+ API_HEADERS = {
22
+ "X-EvalGate-SDK-Version": SDK_VERSION,
23
+ "X-EvalGate-Spec-Version": SPEC_VERSION,
24
+ }
25
+
26
+
27
+ @dataclass
28
+ class QualityLatestData:
29
+ score: float | None = None
30
+ total: int | None = None
31
+ evidence_level: str | None = None
32
+ baseline_score: float | None = None
33
+ regression_delta: float | None = None
34
+ baseline_missing: bool | None = None
35
+ breakdown: dict[str, float] = field(default_factory=dict)
36
+ flags: list[str] = field(default_factory=list)
37
+ evaluation_run_id: int | None = None
38
+ evaluation_id: int | None = None
39
+ avg_latency_ms: float | None = None
40
+ cost_usd: float | None = None
41
+ baseline_cost_usd: float | None = None
42
+ baseline_run_id: int | None = None
43
+
44
+
45
+ @dataclass
46
+ class RunDetailsData:
47
+ results: list[dict[str, Any]] = field(default_factory=list)
48
+
49
+
50
+ @dataclass
51
+ class FetchOptions:
52
+ api_key: str = ""
53
+ base_url: str = ""
54
+ method: str = "GET"
55
+ body: dict[str, Any] | None = None
56
+
57
+
58
+ @dataclass
59
+ class ImportResult:
60
+ test_case_id: int = 0
61
+ status: str = "passed"
62
+ output: str = ""
63
+ latency_ms: float | None = None
64
+ cost_usd: float | None = None
65
+ assertions_json: dict[str, Any] | None = None
66
+
67
+
68
+ @dataclass
69
+ class PublishShareResult:
70
+ share_id: str = ""
71
+ share_url: str = ""
72
+ share_scope: str = ""
73
+
74
+
75
+ def _require_api_key(api_key: str) -> str:
76
+ """Validate that the API key is present."""
77
+ if not api_key or not api_key.strip():
78
+ raise ValueError("API key is required but was empty. Set EVALGATE_API_KEY or pass --api-key.")
79
+ return api_key.strip()
80
+
81
+
82
+ async def fetch_api(
83
+ path: str,
84
+ opts: FetchOptions,
85
+ ) -> dict[str, Any]:
86
+ """Generic authenticated fetch to any API endpoint."""
87
+ key = _require_api_key(opts.api_key)
88
+ headers = {
89
+ **API_HEADERS,
90
+ "Authorization": f"Bearer {key}",
91
+ }
92
+ url = f"{opts.base_url.rstrip('/')}{path}"
93
+
94
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
95
+ if opts.body:
96
+ headers["Content-Type"] = "application/json"
97
+ resp = await client.request(opts.method, url, headers=headers, json=opts.body)
98
+ else:
99
+ resp = await client.request(opts.method, url, headers=headers)
100
+
101
+ if resp.status_code >= 400:
102
+ raise RuntimeError(f"API {resp.status_code}: {resp.text[:200]}")
103
+
104
+ return resp.json()
105
+
106
+
107
+ async def fetch_quality_latest(
108
+ base_url: str,
109
+ api_key: str,
110
+ evaluation_id: str,
111
+ baseline: str,
112
+ ) -> dict[str, Any]:
113
+ """Fetch latest quality data for an evaluation.
114
+
115
+ Returns ``{"ok": True, "data": {...}, "request_id": ...}`` on success,
116
+ or ``{"ok": False, "status": ..., "body": ..., "request_id": ...}`` on failure.
117
+ """
118
+ key = _require_api_key(api_key)
119
+ headers = {**API_HEADERS, "Authorization": f"Bearer {key}"}
120
+ url = (
121
+ f"{base_url.rstrip('/')}/api/quality?evaluationId="
122
+ f"{quote(str(evaluation_id), safe='')}&action=latest&baseline="
123
+ f"{quote(str(baseline), safe='')}"
124
+ )
125
+
126
+ try:
127
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
128
+ resp = await client.get(url, headers=headers)
129
+ request_id = resp.headers.get("x-request-id")
130
+ body = resp.text
131
+
132
+ if resp.status_code >= 400:
133
+ return {"ok": False, "status": resp.status_code, "body": body, "request_id": request_id}
134
+
135
+ data = resp.json()
136
+ return {"ok": True, "data": data, "request_id": request_id}
137
+ except Exception as exc:
138
+ return {"ok": False, "status": 0, "body": str(exc), "request_id": None}
139
+
140
+
141
+ async def fetch_run_details(
142
+ base_url: str,
143
+ api_key: str,
144
+ evaluation_id: str,
145
+ run_id: int,
146
+ ) -> dict[str, Any]:
147
+ """Fetch run details for an evaluation run."""
148
+ key = _require_api_key(api_key)
149
+ headers = {**API_HEADERS, "Authorization": f"Bearer {key}"}
150
+ url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/runs/{run_id}"
151
+
152
+ try:
153
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
154
+ resp = await client.get(url, headers=headers)
155
+ if resp.status_code >= 400:
156
+ return {"ok": False}
157
+ return {"ok": True, "data": resp.json()}
158
+ except Exception:
159
+ return {"ok": False}
160
+
161
+
162
+ async def fetch_run_export(
163
+ base_url: str,
164
+ api_key: str,
165
+ evaluation_id: str,
166
+ run_id: int,
167
+ ) -> dict[str, Any]:
168
+ """Fetch run export data."""
169
+ key = _require_api_key(api_key)
170
+ headers = {**API_HEADERS, "Authorization": f"Bearer {key}"}
171
+ url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/runs/{run_id}/export"
172
+
173
+ try:
174
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
175
+ resp = await client.get(url, headers=headers)
176
+ body = resp.text
177
+ if resp.status_code >= 400:
178
+ return {"ok": False, "status": resp.status_code, "body": body}
179
+ return {"ok": True, "export_data": resp.json()}
180
+ except Exception as exc:
181
+ return {"ok": False, "status": 0, "body": str(exc)}
182
+
183
+
184
+ async def publish_share(
185
+ base_url: str,
186
+ api_key: str,
187
+ evaluation_id: str,
188
+ export_data: dict[str, Any],
189
+ evaluation_run_id: int,
190
+ expires_in_days: int | None = None,
191
+ ) -> dict[str, Any]:
192
+ """Publish a shared report."""
193
+ key = _require_api_key(api_key)
194
+ headers = {
195
+ **API_HEADERS,
196
+ "Authorization": f"Bearer {key}",
197
+ "Content-Type": "application/json",
198
+ }
199
+ body: dict[str, Any] = {
200
+ "exportData": export_data,
201
+ "shareScope": "run",
202
+ "evaluationRunId": evaluation_run_id,
203
+ }
204
+ if expires_in_days is not None:
205
+ body["expiresInDays"] = expires_in_days
206
+
207
+ url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/publish"
208
+
209
+ try:
210
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
211
+ resp = await client.post(url, headers=headers, json=body)
212
+ text = resp.text
213
+ if resp.status_code >= 400:
214
+ return {"ok": False, "status": resp.status_code, "body": text}
215
+ return {"ok": True, "data": resp.json()}
216
+ except Exception as exc:
217
+ return {"ok": False, "status": 0, "body": str(exc)}
218
+
219
+
220
+ async def import_run_on_fail(
221
+ base_url: str,
222
+ api_key: str,
223
+ evaluation_id: str,
224
+ results: list[dict[str, Any]],
225
+ idempotency_key: str | None = None,
226
+ ci: dict[str, Any] | None = None,
227
+ import_client_version: str | None = None,
228
+ check_report: dict[str, Any] | None = None,
229
+ ) -> dict[str, Any]:
230
+ """Import run results on failure."""
231
+ key = _require_api_key(api_key)
232
+ headers: dict[str, str] = {
233
+ **API_HEADERS,
234
+ "Authorization": f"Bearer {key}",
235
+ "Content-Type": "application/json",
236
+ }
237
+ if idempotency_key:
238
+ headers["Idempotency-Key"] = idempotency_key
239
+
240
+ body: dict[str, Any] = {
241
+ "environment": "dev",
242
+ "results": results,
243
+ "importClientVersion": import_client_version or "evalgate-cli",
244
+ }
245
+ if ci:
246
+ body["ci"] = ci
247
+ if check_report:
248
+ body["checkReport"] = check_report
249
+
250
+ url = f"{base_url.rstrip('/')}/api/evaluations/{quote(str(evaluation_id), safe='')}/runs/import"
251
+
252
+ try:
253
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
254
+ resp = await client.post(url, headers=headers, json=body)
255
+ text = resp.text
256
+ if resp.status_code >= 400:
257
+ return {"ok": False, "status": resp.status_code, "body": text}
258
+ data = resp.json()
259
+ return {"ok": True, "run_id": data.get("runId")}
260
+ except Exception as exc:
261
+ return {"ok": False, "status": 0, "body": str(exc)}
@@ -0,0 +1,20 @@
1
+ """Standardized exit codes for evalgate check.
2
+
3
+ Port of ``cli/constants.ts``.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ class EXIT:
10
+ """Exit code constants for CLI commands."""
11
+
12
+ PASS = 0
13
+ SCORE_BELOW = 1
14
+ REGRESSION = 2
15
+ POLICY_VIOLATION = 3
16
+ API_ERROR = 4
17
+ BAD_ARGS = 5
18
+ LOW_N = 6
19
+ WEAK_EVIDENCE = 7
20
+ WARN_REGRESSION = 8