evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. evalgate_sdk/__init__.py +707 -0
  2. evalgate_sdk/_version.py +3 -0
  3. evalgate_sdk/assertions.py +1362 -0
  4. evalgate_sdk/auto.py +247 -0
  5. evalgate_sdk/batch.py +174 -0
  6. evalgate_sdk/cache.py +111 -0
  7. evalgate_sdk/ci_context.py +123 -0
  8. evalgate_sdk/cli/__init__.py +111 -0
  9. evalgate_sdk/cli/api.py +261 -0
  10. evalgate_sdk/cli/cli_constants.py +20 -0
  11. evalgate_sdk/cli/commands.py +1041 -0
  12. evalgate_sdk/cli/config.py +228 -0
  13. evalgate_sdk/cli/env.py +43 -0
  14. evalgate_sdk/cli/formatters/types.py +132 -0
  15. evalgate_sdk/cli/golden_commands.py +322 -0
  16. evalgate_sdk/cli/manifest.py +301 -0
  17. evalgate_sdk/cli/new_commands.py +435 -0
  18. evalgate_sdk/cli/policy_packs.py +103 -0
  19. evalgate_sdk/cli/profiles.py +12 -0
  20. evalgate_sdk/cli/regression_gate.py +312 -0
  21. evalgate_sdk/cli/render/__init__.py +1 -0
  22. evalgate_sdk/cli/render/snippet.py +18 -0
  23. evalgate_sdk/cli/render/sort.py +29 -0
  24. evalgate_sdk/cli/report/__init__.py +1 -0
  25. evalgate_sdk/cli/report/build_check_report.py +209 -0
  26. evalgate_sdk/cli/traces.py +186 -0
  27. evalgate_sdk/cli/workspace.py +63 -0
  28. evalgate_sdk/client.py +609 -0
  29. evalgate_sdk/cluster.py +359 -0
  30. evalgate_sdk/collector.py +161 -0
  31. evalgate_sdk/constants.py +6 -0
  32. evalgate_sdk/context.py +151 -0
  33. evalgate_sdk/errors.py +236 -0
  34. evalgate_sdk/export.py +238 -0
  35. evalgate_sdk/formatters/__init__.py +11 -0
  36. evalgate_sdk/formatters/github.py +51 -0
  37. evalgate_sdk/formatters/human.py +68 -0
  38. evalgate_sdk/formatters/json_fmt.py +11 -0
  39. evalgate_sdk/formatters/pr_comment.py +80 -0
  40. evalgate_sdk/golden.py +426 -0
  41. evalgate_sdk/integrations/__init__.py +1 -0
  42. evalgate_sdk/integrations/anthropic.py +99 -0
  43. evalgate_sdk/integrations/autogen.py +62 -0
  44. evalgate_sdk/integrations/crewai.py +61 -0
  45. evalgate_sdk/integrations/langchain.py +100 -0
  46. evalgate_sdk/integrations/openai.py +155 -0
  47. evalgate_sdk/integrations/openai_eval.py +221 -0
  48. evalgate_sdk/local.py +144 -0
  49. evalgate_sdk/logger.py +123 -0
  50. evalgate_sdk/matchers.py +62 -0
  51. evalgate_sdk/otel.py +256 -0
  52. evalgate_sdk/pagination.py +145 -0
  53. evalgate_sdk/py.typed +0 -0
  54. evalgate_sdk/pytest_plugin.py +96 -0
  55. evalgate_sdk/reason_codes.py +103 -0
  56. evalgate_sdk/regression.py +196 -0
  57. evalgate_sdk/replay_decision.py +115 -0
  58. evalgate_sdk/runtime/__init__.py +50 -0
  59. evalgate_sdk/runtime/adapters/__init__.py +1 -0
  60. evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  61. evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  62. evalgate_sdk/runtime/context.py +68 -0
  63. evalgate_sdk/runtime/eval.py +318 -0
  64. evalgate_sdk/runtime/execution_mode.py +170 -0
  65. evalgate_sdk/runtime/executor.py +92 -0
  66. evalgate_sdk/runtime/registry.py +125 -0
  67. evalgate_sdk/runtime/run_report.py +249 -0
  68. evalgate_sdk/runtime/types.py +143 -0
  69. evalgate_sdk/snapshot.py +219 -0
  70. evalgate_sdk/streaming.py +124 -0
  71. evalgate_sdk/synthesize.py +226 -0
  72. evalgate_sdk/testing.py +128 -0
  73. evalgate_sdk/types.py +666 -0
  74. evalgate_sdk/utils/__init__.py +1 -0
  75. evalgate_sdk/utils/input_hash.py +42 -0
  76. evalgate_sdk/workflows.py +264 -0
  77. evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
  78. evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
  79. evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
  80. evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,186 @@
1
+ """Structured trace writer for evalgate runs.
2
+
3
+ Auto-writes structured JSON to .evalgate/traces/ on every define_eval result.
4
+ Each trace captures: spec identity, timing, assertions, score, and metadata.
5
+
6
+ Port of ``cli/traces.ts``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import os
13
+ import platform
14
+ import sys
15
+ import time
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime, timezone
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+
22
+ @dataclass
23
+ class SpecTrace:
24
+ """Individual spec trace record."""
25
+
26
+ schema_version: int = 1
27
+ timestamp: int = 0
28
+ timestamp_iso: str = ""
29
+ run_id: str = ""
30
+ spec: dict[str, str] = field(default_factory=dict)
31
+ execution: dict[str, Any] = field(default_factory=dict)
32
+ git: dict[str, str] | None = None
33
+ env: dict[str, Any] = field(default_factory=dict)
34
+
35
+ def to_dict(self) -> dict[str, Any]:
36
+ d: dict[str, Any] = {
37
+ "schemaVersion": self.schema_version,
38
+ "timestamp": self.timestamp,
39
+ "timestampISO": self.timestamp_iso,
40
+ "runId": self.run_id,
41
+ "spec": self.spec,
42
+ "execution": self.execution,
43
+ "env": self.env,
44
+ }
45
+ if self.git:
46
+ d["git"] = self.git
47
+ return d
48
+
49
+
50
+ @dataclass
51
+ class RunTrace:
52
+ """Run-level trace summary."""
53
+
54
+ schema_version: int = 1
55
+ run: dict[str, Any] = field(default_factory=dict)
56
+ summary: dict[str, Any] = field(default_factory=dict)
57
+ latency: dict[str, Any] = field(default_factory=dict)
58
+ specs: list[SpecTrace] = field(default_factory=list)
59
+
60
+ def to_dict(self) -> dict[str, Any]:
61
+ return {
62
+ "schemaVersion": self.schema_version,
63
+ "run": self.run,
64
+ "summary": self.summary,
65
+ "latency": self.latency,
66
+ "specs": [s.to_dict() for s in self.specs],
67
+ }
68
+
69
+
70
+ def calculate_percentiles(durations: list[float]) -> dict[str, float]:
71
+ """Calculate latency percentiles from durations."""
72
+ if not durations:
73
+ return {"min": 0, "max": 0, "mean": 0, "p50": 0, "p95": 0, "p99": 0}
74
+
75
+ sorted_d = sorted(durations)
76
+ n = len(sorted_d)
77
+ total = sum(sorted_d)
78
+
79
+ return {
80
+ "min": sorted_d[0],
81
+ "max": sorted_d[-1],
82
+ "mean": round(total / n),
83
+ "p50": sorted_d[int(n * 0.5)],
84
+ "p95": sorted_d[min(int(n * 0.95), n - 1)],
85
+ "p99": sorted_d[min(int(n * 0.99), n - 1)],
86
+ }
87
+
88
+
89
+ def build_run_trace(
90
+ result: dict[str, Any],
91
+ git_info: dict[str, str] | None = None,
92
+ ) -> RunTrace:
93
+ """Build a RunTrace from a run result dict."""
94
+ now = int(time.time() * 1000)
95
+ is_ci = bool(os.environ.get("CI") or os.environ.get("GITHUB_ACTIONS") or os.environ.get("GITLAB_CI"))
96
+
97
+ run_id = result.get("run_id", result.get("runId", "unknown"))
98
+ results_list = result.get("results", [])
99
+ metadata = result.get("metadata", {})
100
+ summary = result.get("summary", {})
101
+
102
+ spec_traces: list[SpecTrace] = []
103
+ for spec in results_list:
104
+ spec_result = spec.get("result", spec)
105
+ spec_traces.append(
106
+ SpecTrace(
107
+ timestamp=now,
108
+ timestamp_iso=datetime.now(timezone.utc).isoformat(),
109
+ run_id=run_id,
110
+ spec={
111
+ "id": spec.get("spec_id", spec.get("specId", "")),
112
+ "name": spec.get("name", ""),
113
+ "filePath": spec.get("file_path", spec.get("filePath", "")),
114
+ },
115
+ execution={
116
+ "status": spec_result.get("status", "unknown"),
117
+ "score": spec_result.get("score"),
118
+ "duration": spec_result.get("duration", spec_result.get("duration_ms", 0)),
119
+ "error": spec_result.get("error"),
120
+ },
121
+ git=git_info,
122
+ env={
123
+ "pythonVersion": sys.version.split()[0],
124
+ "platform": platform.system().lower(),
125
+ "ci": is_ci,
126
+ },
127
+ )
128
+ )
129
+
130
+ durations = [s.execution.get("duration", 0) for s in spec_traces if s.execution.get("status") != "skipped"]
131
+ latency = calculate_percentiles(durations)
132
+
133
+ return RunTrace(
134
+ run={
135
+ "id": run_id,
136
+ "startedAt": metadata.get("started_at", metadata.get("startedAt", now)),
137
+ "completedAt": metadata.get("completed_at", metadata.get("completedAt", now)),
138
+ "duration": metadata.get("duration", 0),
139
+ "mode": metadata.get("mode", "spec"),
140
+ },
141
+ summary={
142
+ "total": len(results_list),
143
+ "passed": summary.get("passed", 0),
144
+ "failed": summary.get("failed", 0),
145
+ "skipped": summary.get("skipped", 0),
146
+ "passRate": summary.get("pass_rate", summary.get("passRate", 0)),
147
+ },
148
+ latency=latency,
149
+ specs=spec_traces,
150
+ )
151
+
152
+
153
+ async def write_traces(
154
+ result: dict[str, Any],
155
+ project_root: str | None = None,
156
+ git_info: dict[str, str] | None = None,
157
+ ) -> str:
158
+ """Write structured trace files to .evalgate/traces/."""
159
+ root = project_root or os.getcwd()
160
+ traces_dir = os.path.join(root, ".evalgate", "traces")
161
+ os.makedirs(traces_dir, exist_ok=True)
162
+
163
+ run_trace = build_run_trace(result, git_info)
164
+ run_id = run_trace.run.get("id", "unknown")
165
+
166
+ trace_file = os.path.join(traces_dir, f"{run_id}.trace.json")
167
+ Path(trace_file).write_text(json.dumps(run_trace.to_dict(), indent=2), encoding="utf-8")
168
+
169
+ latest_file = os.path.join(traces_dir, "latest.trace.json")
170
+ Path(latest_file).write_text(json.dumps(run_trace.to_dict(), indent=2), encoding="utf-8")
171
+
172
+ return trace_file
173
+
174
+
175
+ def format_latency_table(latency: dict[str, Any]) -> str:
176
+ """Format latency percentiles for human display."""
177
+ lines = [
178
+ "⏱️ Latency Percentiles:",
179
+ f" min: {latency.get('min', 0)}ms",
180
+ f" p50: {latency.get('p50', 0)}ms",
181
+ f" p95: {latency.get('p95', 0)}ms",
182
+ f" p99: {latency.get('p99', 0)}ms",
183
+ f" max: {latency.get('max', 0)}ms",
184
+ f" mean: {latency.get('mean', 0)}ms",
185
+ ]
186
+ return "\n".join(lines)
@@ -0,0 +1,63 @@
1
+ """Centralized .evalgate workspace resolution.
2
+
3
+ Provides unified workspace path resolution for all EvalGate CLI commands.
4
+ Prefers .evalgate/; falls back to .evalai/ for backward compatibility.
5
+
6
+ Port of ``cli/workspace.ts``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import os
12
+ import warnings
13
+ from dataclasses import dataclass
14
+
15
+ _LEGACY_WARNING_SHOWN = False
16
+
17
+
18
+ @dataclass
19
+ class EvalWorkspace:
20
+ """EvalGate workspace paths."""
21
+
22
+ root: str
23
+ eval_dir: str
24
+ runs_dir: str
25
+ manifest_path: str
26
+ last_run_path: str
27
+ index_path: str
28
+ baseline_path: str
29
+
30
+
31
+ def resolve_eval_workspace(project_root: str | None = None) -> EvalWorkspace:
32
+ """Resolve EvalGate workspace paths.
33
+
34
+ Prefers ``.evalgate/``, falls back to ``.evalai/`` for legacy projects.
35
+ """
36
+ global _LEGACY_WARNING_SHOWN
37
+
38
+ root = project_root or os.getcwd()
39
+ evalgate_dir = os.path.join(root, ".evalgate")
40
+ evalai_dir = os.path.join(root, ".evalai")
41
+
42
+ use_legacy = os.path.isdir(evalai_dir) and not os.path.isdir(evalgate_dir)
43
+ eval_dir = evalai_dir if use_legacy else evalgate_dir
44
+
45
+ if use_legacy and not _LEGACY_WARNING_SHOWN:
46
+ warnings.warn(
47
+ "[EvalGate] Deprecation: .evalai/ is deprecated. Migrate to .evalgate/ (e.g. rename .evalai to .evalgate).",
48
+ DeprecationWarning,
49
+ stacklevel=2,
50
+ )
51
+ _LEGACY_WARNING_SHOWN = True
52
+
53
+ runs_dir = os.path.join(eval_dir, "runs")
54
+
55
+ return EvalWorkspace(
56
+ root=root,
57
+ eval_dir=eval_dir,
58
+ runs_dir=runs_dir,
59
+ manifest_path=os.path.join(eval_dir, "manifest.json"),
60
+ last_run_path=os.path.join(eval_dir, "last-run.json"),
61
+ index_path=os.path.join(runs_dir, "index.json"),
62
+ baseline_path=os.path.join(eval_dir, "baseline-run.json"),
63
+ )