evalgate-sdk 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalgate_sdk/__init__.py +707 -0
- evalgate_sdk/_version.py +3 -0
- evalgate_sdk/assertions.py +1362 -0
- evalgate_sdk/auto.py +247 -0
- evalgate_sdk/batch.py +174 -0
- evalgate_sdk/cache.py +111 -0
- evalgate_sdk/ci_context.py +123 -0
- evalgate_sdk/cli/__init__.py +111 -0
- evalgate_sdk/cli/api.py +261 -0
- evalgate_sdk/cli/cli_constants.py +20 -0
- evalgate_sdk/cli/commands.py +1041 -0
- evalgate_sdk/cli/config.py +228 -0
- evalgate_sdk/cli/env.py +43 -0
- evalgate_sdk/cli/formatters/types.py +132 -0
- evalgate_sdk/cli/golden_commands.py +322 -0
- evalgate_sdk/cli/manifest.py +301 -0
- evalgate_sdk/cli/new_commands.py +435 -0
- evalgate_sdk/cli/policy_packs.py +103 -0
- evalgate_sdk/cli/profiles.py +12 -0
- evalgate_sdk/cli/regression_gate.py +312 -0
- evalgate_sdk/cli/render/__init__.py +1 -0
- evalgate_sdk/cli/render/snippet.py +18 -0
- evalgate_sdk/cli/render/sort.py +29 -0
- evalgate_sdk/cli/report/__init__.py +1 -0
- evalgate_sdk/cli/report/build_check_report.py +209 -0
- evalgate_sdk/cli/traces.py +186 -0
- evalgate_sdk/cli/workspace.py +63 -0
- evalgate_sdk/client.py +609 -0
- evalgate_sdk/cluster.py +359 -0
- evalgate_sdk/collector.py +161 -0
- evalgate_sdk/constants.py +6 -0
- evalgate_sdk/context.py +151 -0
- evalgate_sdk/errors.py +236 -0
- evalgate_sdk/export.py +238 -0
- evalgate_sdk/formatters/__init__.py +11 -0
- evalgate_sdk/formatters/github.py +51 -0
- evalgate_sdk/formatters/human.py +68 -0
- evalgate_sdk/formatters/json_fmt.py +11 -0
- evalgate_sdk/formatters/pr_comment.py +80 -0
- evalgate_sdk/golden.py +426 -0
- evalgate_sdk/integrations/__init__.py +1 -0
- evalgate_sdk/integrations/anthropic.py +99 -0
- evalgate_sdk/integrations/autogen.py +62 -0
- evalgate_sdk/integrations/crewai.py +61 -0
- evalgate_sdk/integrations/langchain.py +100 -0
- evalgate_sdk/integrations/openai.py +155 -0
- evalgate_sdk/integrations/openai_eval.py +221 -0
- evalgate_sdk/local.py +144 -0
- evalgate_sdk/logger.py +123 -0
- evalgate_sdk/matchers.py +62 -0
- evalgate_sdk/otel.py +256 -0
- evalgate_sdk/pagination.py +145 -0
- evalgate_sdk/py.typed +0 -0
- evalgate_sdk/pytest_plugin.py +96 -0
- evalgate_sdk/reason_codes.py +103 -0
- evalgate_sdk/regression.py +196 -0
- evalgate_sdk/replay_decision.py +115 -0
- evalgate_sdk/runtime/__init__.py +50 -0
- evalgate_sdk/runtime/adapters/__init__.py +1 -0
- evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
- evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
- evalgate_sdk/runtime/context.py +68 -0
- evalgate_sdk/runtime/eval.py +318 -0
- evalgate_sdk/runtime/execution_mode.py +170 -0
- evalgate_sdk/runtime/executor.py +92 -0
- evalgate_sdk/runtime/registry.py +125 -0
- evalgate_sdk/runtime/run_report.py +249 -0
- evalgate_sdk/runtime/types.py +143 -0
- evalgate_sdk/snapshot.py +219 -0
- evalgate_sdk/streaming.py +124 -0
- evalgate_sdk/synthesize.py +226 -0
- evalgate_sdk/testing.py +128 -0
- evalgate_sdk/types.py +666 -0
- evalgate_sdk/utils/__init__.py +1 -0
- evalgate_sdk/utils/input_hash.py +42 -0
- evalgate_sdk/workflows.py +264 -0
- evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
- evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
- evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
- evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
evalgate_sdk/auto.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Literal
|
|
9
|
+
|
|
10
|
+
from evalgate_sdk.golden import extract_run_metrics
|
|
11
|
+
from evalgate_sdk.replay_decision import NormalizedBudgetConfig, ReplayDecision, evaluate_replay_outcome
|
|
12
|
+
|
|
13
|
+
DEFAULT_AUTO_HISTORY_PATH = str(Path(".evalgate") / "auto" / "history.jsonl")
|
|
14
|
+
DEFAULT_AUTO_REPORT_PATH = str(Path(".evalgate") / "auto" / "latest-report.json")
|
|
15
|
+
|
|
16
|
+
AutoFormat = Literal["human", "json"]
|
|
17
|
+
AutoExecutionMode = Literal["plan", "evaluate"]
|
|
18
|
+
AutoDecision = Literal["keep", "discard", "investigate"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(slots=True)
|
|
22
|
+
class AutoPlanStep:
|
|
23
|
+
title: str
|
|
24
|
+
detail: str
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> dict[str, str]:
|
|
27
|
+
return {
|
|
28
|
+
"title": self.title,
|
|
29
|
+
"detail": self.detail,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(slots=True)
|
|
34
|
+
class AutoOptions:
|
|
35
|
+
objective: str
|
|
36
|
+
hypothesis: str | None = None
|
|
37
|
+
prompt_path: str | None = None
|
|
38
|
+
baseline_run_path: str | None = None
|
|
39
|
+
candidate_run_path: str | None = None
|
|
40
|
+
budget: int = 1
|
|
41
|
+
budget_mode: Literal["traces", "cost"] = "traces"
|
|
42
|
+
autonomous: bool = False
|
|
43
|
+
dry_run: bool = False
|
|
44
|
+
format: AutoFormat = "human"
|
|
45
|
+
report_path: str = DEFAULT_AUTO_REPORT_PATH
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class AutoDiffSnapshot:
|
|
50
|
+
pass_rate_delta_ratio: float
|
|
51
|
+
corrected_pass_rate_delta_ratio: float | None
|
|
52
|
+
pass_rate_basis: Literal["corrected", "raw"]
|
|
53
|
+
regressions: int = 0
|
|
54
|
+
improvements: int = 0
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> dict[str, Any]:
|
|
57
|
+
return {
|
|
58
|
+
"passRateDeltaRatio": self.pass_rate_delta_ratio,
|
|
59
|
+
"correctedPassRateDeltaRatio": self.corrected_pass_rate_delta_ratio,
|
|
60
|
+
"passRateBasis": self.pass_rate_basis,
|
|
61
|
+
"regressions": self.regressions,
|
|
62
|
+
"improvements": self.improvements,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(slots=True)
|
|
67
|
+
class AutoIterationResult:
|
|
68
|
+
decision: AutoDecision
|
|
69
|
+
reason: str
|
|
70
|
+
replay_decision: ReplayDecision | None = None
|
|
71
|
+
diff: AutoDiffSnapshot | None = None
|
|
72
|
+
|
|
73
|
+
def to_dict(self) -> dict[str, Any]:
|
|
74
|
+
return {
|
|
75
|
+
"decision": self.decision,
|
|
76
|
+
"reason": self.reason,
|
|
77
|
+
"replayDecision": self.replay_decision.to_dict() if self.replay_decision else None,
|
|
78
|
+
"diff": self.diff.to_dict() if self.diff else None,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass(slots=True)
|
|
83
|
+
class AutoReport:
|
|
84
|
+
objective: str
|
|
85
|
+
execution_mode: AutoExecutionMode
|
|
86
|
+
dry_run: bool
|
|
87
|
+
autonomous: bool
|
|
88
|
+
prompt_path: str | None
|
|
89
|
+
baseline_run_path: str | None
|
|
90
|
+
candidate_run_path: str | None
|
|
91
|
+
iteration_budget: int
|
|
92
|
+
generated_at: str
|
|
93
|
+
plan: list[AutoPlanStep] = field(default_factory=list)
|
|
94
|
+
iteration_result: AutoIterationResult | None = None
|
|
95
|
+
|
|
96
|
+
def to_dict(self) -> dict[str, Any]:
|
|
97
|
+
return {
|
|
98
|
+
"objective": self.objective,
|
|
99
|
+
"executionMode": self.execution_mode,
|
|
100
|
+
"dryRun": self.dry_run,
|
|
101
|
+
"autonomous": self.autonomous,
|
|
102
|
+
"promptPath": self.prompt_path,
|
|
103
|
+
"baselineRunPath": self.baseline_run_path,
|
|
104
|
+
"candidateRunPath": self.candidate_run_path,
|
|
105
|
+
"iterationBudget": self.iteration_budget,
|
|
106
|
+
"generatedAt": self.generated_at,
|
|
107
|
+
"plan": [step.to_dict() for step in self.plan],
|
|
108
|
+
"iterationResult": self.iteration_result.to_dict() if self.iteration_result else None,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def build_auto_plan(options: AutoOptions) -> list[AutoPlanStep]:
|
|
113
|
+
prompt_target = options.prompt_path or "declared prompt target"
|
|
114
|
+
hypothesis = options.hypothesis or f"Reduce {options.objective} failures"
|
|
115
|
+
return [
|
|
116
|
+
AutoPlanStep(title="Objective", detail=f"Target failure mode: {options.objective}"),
|
|
117
|
+
AutoPlanStep(title="Hypothesis", detail=hypothesis),
|
|
118
|
+
AutoPlanStep(title="Mutation target", detail=prompt_target),
|
|
119
|
+
AutoPlanStep(title="Budget", detail=f"{options.budget} {options.budget_mode} per iteration"),
|
|
120
|
+
AutoPlanStep(
|
|
121
|
+
title="Evaluation",
|
|
122
|
+
detail=(
|
|
123
|
+
"Compare candidate run to baseline and keep only non-regressing improvements"
|
|
124
|
+
if options.baseline_run_path and options.candidate_run_path
|
|
125
|
+
else "Generate bounded plan only until baseline/candidate artifacts are provided"
|
|
126
|
+
),
|
|
127
|
+
),
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _read_json(file_path: str) -> dict[str, Any]:
|
|
132
|
+
data = json.loads(Path(file_path).read_text(encoding="utf-8"))
|
|
133
|
+
if not isinstance(data, dict):
|
|
134
|
+
raise ValueError("Auto run input must be a JSON object")
|
|
135
|
+
return data
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _build_diff_snapshot(previous_run: dict[str, Any], candidate_run: dict[str, Any], decision: ReplayDecision) -> AutoDiffSnapshot:
|
|
139
|
+
previous_metrics = extract_run_metrics(previous_run)
|
|
140
|
+
candidate_metrics = extract_run_metrics(candidate_run)
|
|
141
|
+
corrected_delta = None
|
|
142
|
+
if previous_metrics.corrected_pass_rate_ratio is not None and candidate_metrics.corrected_pass_rate_ratio is not None:
|
|
143
|
+
corrected_delta = candidate_metrics.corrected_pass_rate_ratio - previous_metrics.corrected_pass_rate_ratio
|
|
144
|
+
return AutoDiffSnapshot(
|
|
145
|
+
pass_rate_delta_ratio=candidate_metrics.pass_rate_ratio - previous_metrics.pass_rate_ratio,
|
|
146
|
+
corrected_pass_rate_delta_ratio=corrected_delta,
|
|
147
|
+
pass_rate_basis=decision.comparison_basis,
|
|
148
|
+
regressions=0 if decision.action == "keep" else 1,
|
|
149
|
+
improvements=1 if decision.action == "keep" else 0,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def decide_auto_experiment(options: AutoOptions) -> AutoIterationResult | None:
|
|
154
|
+
if not options.baseline_run_path or not options.candidate_run_path:
|
|
155
|
+
return None
|
|
156
|
+
previous_run = _read_json(options.baseline_run_path)
|
|
157
|
+
candidate_run = _read_json(options.candidate_run_path)
|
|
158
|
+
budget_config = NormalizedBudgetConfig(
|
|
159
|
+
mode=options.budget_mode,
|
|
160
|
+
max_traces=options.budget if options.budget_mode == "traces" else None,
|
|
161
|
+
max_cost_usd=float(options.budget) if options.budget_mode == "cost" else None,
|
|
162
|
+
)
|
|
163
|
+
replay = evaluate_replay_outcome(previous_run, candidate_run, budget_config)
|
|
164
|
+
if replay.action == "keep":
|
|
165
|
+
decision: AutoDecision = "keep"
|
|
166
|
+
elif replay.reason == "budget_exceeded":
|
|
167
|
+
decision = "investigate"
|
|
168
|
+
else:
|
|
169
|
+
decision = "discard"
|
|
170
|
+
return AutoIterationResult(
|
|
171
|
+
decision=decision,
|
|
172
|
+
reason=replay.reason,
|
|
173
|
+
replay_decision=replay,
|
|
174
|
+
diff=_build_diff_snapshot(previous_run, candidate_run, replay),
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def build_auto_report(options: AutoOptions) -> AutoReport:
|
|
179
|
+
execution_mode: AutoExecutionMode = (
|
|
180
|
+
"evaluate" if options.baseline_run_path and options.candidate_run_path and not options.dry_run else "plan"
|
|
181
|
+
)
|
|
182
|
+
return AutoReport(
|
|
183
|
+
objective=options.objective,
|
|
184
|
+
execution_mode=execution_mode,
|
|
185
|
+
dry_run=options.dry_run,
|
|
186
|
+
autonomous=options.autonomous,
|
|
187
|
+
prompt_path=options.prompt_path,
|
|
188
|
+
baseline_run_path=options.baseline_run_path,
|
|
189
|
+
candidate_run_path=options.candidate_run_path,
|
|
190
|
+
iteration_budget=options.budget,
|
|
191
|
+
generated_at=datetime.now(timezone.utc).isoformat(),
|
|
192
|
+
plan=build_auto_plan(options),
|
|
193
|
+
iteration_result=decide_auto_experiment(options) if execution_mode == "evaluate" else None,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def format_auto_human(report: AutoReport) -> str:
|
|
198
|
+
lines = [
|
|
199
|
+
"Auto phase",
|
|
200
|
+
f"Objective: {report.objective}",
|
|
201
|
+
f"Mode: {report.execution_mode}",
|
|
202
|
+
f"Budget: {report.iteration_budget}",
|
|
203
|
+
]
|
|
204
|
+
if report.prompt_path:
|
|
205
|
+
lines.append(f"Prompt: {report.prompt_path}")
|
|
206
|
+
lines.append("Plan:")
|
|
207
|
+
for index, step in enumerate(report.plan, start=1):
|
|
208
|
+
lines.append(f"{index}. {step.title} — {step.detail}")
|
|
209
|
+
if report.iteration_result:
|
|
210
|
+
lines.append("")
|
|
211
|
+
lines.append(f"Decision: {report.iteration_result.decision}")
|
|
212
|
+
lines.append(f"Reason: {report.iteration_result.reason}")
|
|
213
|
+
if report.iteration_result.diff:
|
|
214
|
+
lines.append(
|
|
215
|
+
f"Pass-rate delta: {report.iteration_result.diff.pass_rate_delta_ratio:+.4f} ({report.iteration_result.diff.pass_rate_basis})"
|
|
216
|
+
)
|
|
217
|
+
return "\n".join(lines)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def append_auto_history(report: AutoReport, history_path: str = DEFAULT_AUTO_HISTORY_PATH) -> None:
|
|
221
|
+
path = Path(history_path)
|
|
222
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
223
|
+
with path.open("a", encoding="utf-8") as handle:
|
|
224
|
+
handle.write(json.dumps(report.to_dict(), separators=(",", ":")) + "\n")
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def write_auto_report(report: AutoReport, report_path: str | None = None) -> None:
|
|
228
|
+
path = Path(report_path or DEFAULT_AUTO_REPORT_PATH)
|
|
229
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
path.write_text(json.dumps(report.to_dict(), indent=2), encoding="utf-8")
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def run_auto_daemon(
|
|
234
|
+
options: AutoOptions,
|
|
235
|
+
*,
|
|
236
|
+
cycles: int,
|
|
237
|
+
interval_ms: int = 0,
|
|
238
|
+
history_path: str = DEFAULT_AUTO_HISTORY_PATH,
|
|
239
|
+
) -> list[AutoReport]:
|
|
240
|
+
reports: list[AutoReport] = []
|
|
241
|
+
for index in range(max(0, cycles)):
|
|
242
|
+
report = build_auto_report(options)
|
|
243
|
+
append_auto_history(report, history_path=history_path)
|
|
244
|
+
reports.append(report)
|
|
245
|
+
if interval_ms > 0 and index < cycles - 1:
|
|
246
|
+
time.sleep(interval_ms / 1000.0)
|
|
247
|
+
return reports
|
evalgate_sdk/batch.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Request batching and concurrent processing utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import inspect
|
|
7
|
+
import uuid
|
|
8
|
+
from collections.abc import Callable, Coroutine
|
|
9
|
+
from typing import Any, TypeVar
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T")
|
|
12
|
+
R = TypeVar("R")
|
|
13
|
+
|
|
14
|
+
_BATCHABLE_ENDPOINTS = {"/api/traces", "/api/evaluations", "/api/annotations"}
|
|
15
|
+
|
|
16
|
+
def _is_async_callable(processor: Any) -> bool:
|
|
17
|
+
return inspect.iscoroutinefunction(processor) or inspect.iscoroutinefunction(
|
|
18
|
+
getattr(processor, "__call__", None)
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def can_batch(method: str, endpoint: str) -> bool:
|
|
23
|
+
"""Check if a request is eligible for batching (POST to known endpoints)."""
|
|
24
|
+
return method.upper() == "POST" and any(endpoint.startswith(ep) for ep in _BATCHABLE_ENDPOINTS)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RequestBatcher:
|
|
28
|
+
"""Accumulates requests and flushes them in batches.
|
|
29
|
+
|
|
30
|
+
Usage::
|
|
31
|
+
|
|
32
|
+
batcher = RequestBatcher(flush_fn=my_http_batch, max_batch_size=10)
|
|
33
|
+
result = await batcher.enqueue("POST", "/api/traces", body={...})
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
flush_fn: Callable[[list[dict[str, Any]]], Any],
|
|
39
|
+
max_batch_size: int = 10,
|
|
40
|
+
delay_ms: int = 50,
|
|
41
|
+
) -> None:
|
|
42
|
+
self._flush_fn = flush_fn
|
|
43
|
+
self._max_batch_size = max_batch_size
|
|
44
|
+
self._delay = delay_ms / 1000
|
|
45
|
+
self._queue: list[dict[str, Any]] = []
|
|
46
|
+
self._pending: dict[str, asyncio.Future[Any]] = {}
|
|
47
|
+
self._timer: asyncio.TimerHandle | None = None
|
|
48
|
+
|
|
49
|
+
async def enqueue(
|
|
50
|
+
self,
|
|
51
|
+
method: str,
|
|
52
|
+
endpoint: str,
|
|
53
|
+
body: Any | None = None,
|
|
54
|
+
headers: dict[str, str] | None = None,
|
|
55
|
+
) -> Any:
|
|
56
|
+
request_id = str(uuid.uuid4())
|
|
57
|
+
loop = asyncio.get_running_loop()
|
|
58
|
+
future: asyncio.Future[Any] = loop.create_future()
|
|
59
|
+
self._pending[request_id] = future
|
|
60
|
+
|
|
61
|
+
self._queue.append(
|
|
62
|
+
{
|
|
63
|
+
"id": request_id,
|
|
64
|
+
"method": method,
|
|
65
|
+
"endpoint": endpoint,
|
|
66
|
+
"body": body,
|
|
67
|
+
"headers": headers or {},
|
|
68
|
+
}
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if len(self._queue) >= self._max_batch_size:
|
|
72
|
+
await self.flush()
|
|
73
|
+
elif self._timer is None:
|
|
74
|
+
self._timer = loop.call_later(self._delay, lambda: asyncio.ensure_future(self.flush()))
|
|
75
|
+
|
|
76
|
+
return await future
|
|
77
|
+
|
|
78
|
+
async def flush(self) -> None:
|
|
79
|
+
if not self._queue:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
if self._timer is not None:
|
|
83
|
+
self._timer.cancel()
|
|
84
|
+
self._timer = None
|
|
85
|
+
|
|
86
|
+
batch = self._queue[:]
|
|
87
|
+
self._queue.clear()
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
results = await self._flush_fn(batch)
|
|
91
|
+
if isinstance(results, list):
|
|
92
|
+
for req, result in zip(batch, results, strict=False):
|
|
93
|
+
fut = self._pending.pop(req["id"], None)
|
|
94
|
+
if fut and not fut.done():
|
|
95
|
+
fut.set_result(result)
|
|
96
|
+
else:
|
|
97
|
+
for req in batch:
|
|
98
|
+
fut = self._pending.pop(req["id"], None)
|
|
99
|
+
if fut and not fut.done():
|
|
100
|
+
fut.set_result(results)
|
|
101
|
+
except Exception as exc:
|
|
102
|
+
for req in batch:
|
|
103
|
+
fut = self._pending.pop(req["id"], None)
|
|
104
|
+
if fut and not fut.done():
|
|
105
|
+
fut.set_exception(exc)
|
|
106
|
+
|
|
107
|
+
def clear(self) -> None:
|
|
108
|
+
if self._timer is not None:
|
|
109
|
+
self._timer.cancel()
|
|
110
|
+
self._timer = None
|
|
111
|
+
for fut in self._pending.values():
|
|
112
|
+
if not fut.done():
|
|
113
|
+
fut.cancel()
|
|
114
|
+
self._queue.clear()
|
|
115
|
+
self._pending.clear()
|
|
116
|
+
|
|
117
|
+
def get_stats(self) -> dict[str, int]:
|
|
118
|
+
return {"queue_size": len(self._queue), "max_batch_size": self._max_batch_size}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
async def batch_process(
|
|
122
|
+
items: list[T],
|
|
123
|
+
processor: Callable[[T], Coroutine[Any, Any, R]],
|
|
124
|
+
concurrency: int = 5,
|
|
125
|
+
*,
|
|
126
|
+
continue_on_error: bool = False,
|
|
127
|
+
on_progress: Callable[[int, int], None] | None = None,
|
|
128
|
+
) -> list[R]:
|
|
129
|
+
"""Process items with bounded concurrency.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
items: Items to process.
|
|
133
|
+
processor: Async function to apply to each item.
|
|
134
|
+
concurrency: Max concurrent tasks.
|
|
135
|
+
continue_on_error: If True, collect errors instead of raising.
|
|
136
|
+
on_progress: Optional callback (completed, total).
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Results in same order as items.
|
|
140
|
+
"""
|
|
141
|
+
if not callable(processor):
|
|
142
|
+
raise TypeError("batch_process requires processor to be callable")
|
|
143
|
+
|
|
144
|
+
if not _is_async_callable(processor):
|
|
145
|
+
raise TypeError("batch_process requires processor to be an async callable")
|
|
146
|
+
|
|
147
|
+
if concurrency < 1:
|
|
148
|
+
raise ValueError("batch_process concurrency must be at least 1")
|
|
149
|
+
|
|
150
|
+
semaphore = asyncio.Semaphore(concurrency)
|
|
151
|
+
results: list[R | None] = [None] * len(items)
|
|
152
|
+
errors: list[Exception | None] = [None] * len(items)
|
|
153
|
+
completed = 0
|
|
154
|
+
|
|
155
|
+
async def _run(index: int, item: T) -> None:
|
|
156
|
+
nonlocal completed
|
|
157
|
+
async with semaphore:
|
|
158
|
+
try:
|
|
159
|
+
results[index] = await processor(item)
|
|
160
|
+
except Exception as exc:
|
|
161
|
+
if continue_on_error:
|
|
162
|
+
errors[index] = exc
|
|
163
|
+
else:
|
|
164
|
+
raise
|
|
165
|
+
finally:
|
|
166
|
+
completed += 1
|
|
167
|
+
if on_progress:
|
|
168
|
+
on_progress(completed, len(items))
|
|
169
|
+
|
|
170
|
+
await asyncio.gather(*[_run(i, item) for i, item in enumerate(items)])
|
|
171
|
+
|
|
172
|
+
if not continue_on_error:
|
|
173
|
+
return [r for r in results if r is not None] if None not in results else results # type: ignore[return-value]
|
|
174
|
+
return results # type: ignore[return-value]
|
evalgate_sdk/cache.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""LRU request cache with TTL expiration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
from collections import OrderedDict
|
|
9
|
+
from typing import Any, TypeVar
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CacheTTL:
|
|
15
|
+
"""Pre-defined TTL values in seconds."""
|
|
16
|
+
|
|
17
|
+
SHORT = 30
|
|
18
|
+
MEDIUM = 300
|
|
19
|
+
LONG = 1800
|
|
20
|
+
HOUR = 3600
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
_CACHEABLE_PREFIXES = (
|
|
24
|
+
"/api/traces",
|
|
25
|
+
"/api/evaluations",
|
|
26
|
+
"/api/organizations",
|
|
27
|
+
"/api/developer",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def should_cache(method: str, endpoint: str) -> bool:
|
|
32
|
+
"""Check if a request is cacheable (GET to known endpoints)."""
|
|
33
|
+
return method.upper() == "GET" and any(endpoint.startswith(p) for p in _CACHEABLE_PREFIXES)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_ttl(endpoint: str) -> int:
|
|
37
|
+
"""Return the appropriate TTL for an endpoint."""
|
|
38
|
+
if "/organizations" in endpoint:
|
|
39
|
+
return CacheTTL.LONG
|
|
40
|
+
if "/developer" in endpoint:
|
|
41
|
+
return CacheTTL.MEDIUM
|
|
42
|
+
return CacheTTL.SHORT
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RequestCache:
|
|
46
|
+
"""In-memory LRU cache with per-entry TTL.
|
|
47
|
+
|
|
48
|
+
Usage::
|
|
49
|
+
|
|
50
|
+
cache = RequestCache(max_size=500)
|
|
51
|
+
cache.set("GET", "/api/traces", data, CacheTTL.SHORT)
|
|
52
|
+
hit = cache.get("GET", "/api/traces") # returns data or None
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, max_size: int = 1000) -> None:
|
|
56
|
+
self._max_size = max_size
|
|
57
|
+
self._store: OrderedDict[str, _CacheEntry] = OrderedDict()
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _key(method: str, url: str, params: Any = None) -> str:
|
|
61
|
+
raw = f"{method.upper()}:{url}"
|
|
62
|
+
if params:
|
|
63
|
+
raw += f":{json.dumps(params, sort_keys=True, default=str)}"
|
|
64
|
+
return hashlib.sha256(raw.encode()).hexdigest()
|
|
65
|
+
|
|
66
|
+
def get(self, method: str, url: str, params: Any = None) -> Any | None:
|
|
67
|
+
key = self._key(method, url, params)
|
|
68
|
+
entry = self._store.get(key)
|
|
69
|
+
if entry is None:
|
|
70
|
+
return None
|
|
71
|
+
if entry.is_expired():
|
|
72
|
+
del self._store[key]
|
|
73
|
+
return None
|
|
74
|
+
self._store.move_to_end(key)
|
|
75
|
+
return entry.data
|
|
76
|
+
|
|
77
|
+
def set(self, method: str, url: str, data: Any, ttl: int, params: Any = None) -> None:
|
|
78
|
+
key = self._key(method, url, params)
|
|
79
|
+
if key in self._store:
|
|
80
|
+
del self._store[key]
|
|
81
|
+
elif len(self._store) >= self._max_size:
|
|
82
|
+
self._store.popitem(last=False)
|
|
83
|
+
self._store[key] = _CacheEntry(data=data, ttl=ttl)
|
|
84
|
+
|
|
85
|
+
def invalidate(self, method: str, url: str, params: Any = None) -> None:
|
|
86
|
+
key = self._key(method, url, params)
|
|
87
|
+
self._store.pop(key, None)
|
|
88
|
+
|
|
89
|
+
def invalidate_pattern(self, pattern: str) -> None:
|
|
90
|
+
"""Remove all entries whose URL key contains *pattern*."""
|
|
91
|
+
to_remove = [k for k, v in self._store.items() if pattern in v.url_hint]
|
|
92
|
+
for k in to_remove:
|
|
93
|
+
del self._store[k]
|
|
94
|
+
|
|
95
|
+
def clear(self) -> None:
|
|
96
|
+
self._store.clear()
|
|
97
|
+
|
|
98
|
+
def get_stats(self) -> dict[str, int]:
|
|
99
|
+
return {"size": len(self._store), "max_size": self._max_size}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class _CacheEntry:
|
|
103
|
+
__slots__ = ("data", "expires_at", "url_hint")
|
|
104
|
+
|
|
105
|
+
def __init__(self, data: Any, ttl: int, url_hint: str = "") -> None:
|
|
106
|
+
self.data = data
|
|
107
|
+
self.expires_at = time.monotonic() + ttl
|
|
108
|
+
self.url_hint = url_hint
|
|
109
|
+
|
|
110
|
+
def is_expired(self) -> bool:
|
|
111
|
+
return time.monotonic() > self.expires_at
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""CI context auto-detection (T10).
|
|
2
|
+
|
|
3
|
+
Port of the TypeScript SDK's ``ci-context.ts``.
|
|
4
|
+
Detects GitHub Actions, GitLab CI, CircleCI, and other CI providers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import contextlib
|
|
10
|
+
import os
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Literal
|
|
13
|
+
|
|
14
|
+
CIProvider = Literal["github", "gitlab", "circle", "azure", "jenkins", "unknown"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class CIContext:
|
|
19
|
+
provider: CIProvider = "unknown"
|
|
20
|
+
repo: str | None = None
|
|
21
|
+
sha: str | None = None
|
|
22
|
+
branch: str | None = None
|
|
23
|
+
pr: int | None = None
|
|
24
|
+
run_url: str | None = None
|
|
25
|
+
actor: str | None = None
|
|
26
|
+
is_ci: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def detect_ci_context() -> CIContext:
|
|
30
|
+
"""Auto-detect CI provider and extract context from environment variables."""
|
|
31
|
+
if os.environ.get("GITHUB_ACTIONS") == "true":
|
|
32
|
+
return _github_context()
|
|
33
|
+
if os.environ.get("GITLAB_CI") == "true":
|
|
34
|
+
return _gitlab_context()
|
|
35
|
+
if os.environ.get("CIRCLECI") == "true":
|
|
36
|
+
return _circle_context()
|
|
37
|
+
if os.environ.get("TF_BUILD") == "true":
|
|
38
|
+
return _azure_context()
|
|
39
|
+
if os.environ.get("JENKINS_URL"):
|
|
40
|
+
return _jenkins_context()
|
|
41
|
+
if any(os.environ.get(k) for k in ("CI", "CONTINUOUS_INTEGRATION", "BUILD_NUMBER")):
|
|
42
|
+
return CIContext(provider="unknown", is_ci=True)
|
|
43
|
+
return CIContext()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _github_context() -> CIContext:
|
|
47
|
+
ref = os.environ.get("GITHUB_REF", "")
|
|
48
|
+
pr_num = None
|
|
49
|
+
if "/pull/" in ref:
|
|
50
|
+
with contextlib.suppress(ValueError, IndexError):
|
|
51
|
+
pr_num = int(ref.split("/pull/")[1].split("/")[0])
|
|
52
|
+
|
|
53
|
+
server = os.environ.get("GITHUB_SERVER_URL", "https://github.com")
|
|
54
|
+
repo = os.environ.get("GITHUB_REPOSITORY", "")
|
|
55
|
+
run_id = os.environ.get("GITHUB_RUN_ID", "")
|
|
56
|
+
run_url = f"{server}/{repo}/actions/runs/{run_id}" if repo and run_id else None
|
|
57
|
+
branch = os.environ.get("GITHUB_REF_NAME")
|
|
58
|
+
if "/pull/" in ref:
|
|
59
|
+
branch = os.environ.get("GITHUB_HEAD_REF") or branch
|
|
60
|
+
|
|
61
|
+
return CIContext(
|
|
62
|
+
provider="github",
|
|
63
|
+
repo=repo or None,
|
|
64
|
+
sha=os.environ.get("GITHUB_SHA"),
|
|
65
|
+
branch=branch,
|
|
66
|
+
pr=pr_num,
|
|
67
|
+
run_url=run_url,
|
|
68
|
+
actor=os.environ.get("GITHUB_ACTOR"),
|
|
69
|
+
is_ci=True,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _gitlab_context() -> CIContext:
|
|
74
|
+
mr_iid = os.environ.get("CI_MERGE_REQUEST_IID")
|
|
75
|
+
return CIContext(
|
|
76
|
+
provider="gitlab",
|
|
77
|
+
repo=os.environ.get("CI_PROJECT_PATH"),
|
|
78
|
+
sha=os.environ.get("CI_COMMIT_SHA"),
|
|
79
|
+
branch=os.environ.get("CI_COMMIT_REF_NAME"),
|
|
80
|
+
pr=int(mr_iid) if mr_iid else None,
|
|
81
|
+
run_url=os.environ.get("CI_JOB_URL"),
|
|
82
|
+
actor=os.environ.get("GITLAB_USER_LOGIN"),
|
|
83
|
+
is_ci=True,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _circle_context() -> CIContext:
|
|
88
|
+
pr_num = os.environ.get("CIRCLE_PR_NUMBER")
|
|
89
|
+
return CIContext(
|
|
90
|
+
provider="circle",
|
|
91
|
+
repo=os.environ.get("CIRCLE_PROJECT_REPONAME"),
|
|
92
|
+
sha=os.environ.get("CIRCLE_SHA1"),
|
|
93
|
+
branch=os.environ.get("CIRCLE_BRANCH"),
|
|
94
|
+
pr=int(pr_num) if pr_num else None,
|
|
95
|
+
run_url=os.environ.get("CIRCLE_BUILD_URL"),
|
|
96
|
+
actor=os.environ.get("CIRCLE_USERNAME"),
|
|
97
|
+
is_ci=True,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _azure_context() -> CIContext:
|
|
102
|
+
pr_id = os.environ.get("SYSTEM_PULLREQUEST_PULLREQUESTID")
|
|
103
|
+
return CIContext(
|
|
104
|
+
provider="azure",
|
|
105
|
+
repo=os.environ.get("BUILD_REPOSITORY_NAME"),
|
|
106
|
+
sha=os.environ.get("BUILD_SOURCEVERSION"),
|
|
107
|
+
branch=os.environ.get("BUILD_SOURCEBRANCH"),
|
|
108
|
+
pr=int(pr_id) if pr_id else None,
|
|
109
|
+
run_url=os.environ.get("SYSTEM_TEAMFOUNDATIONCOLLECTIONURI"),
|
|
110
|
+
actor=os.environ.get("BUILD_REQUESTEDFOR"),
|
|
111
|
+
is_ci=True,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _jenkins_context() -> CIContext:
|
|
116
|
+
return CIContext(
|
|
117
|
+
provider="jenkins",
|
|
118
|
+
sha=os.environ.get("GIT_COMMIT"),
|
|
119
|
+
branch=os.environ.get("GIT_BRANCH"),
|
|
120
|
+
run_url=os.environ.get("BUILD_URL"),
|
|
121
|
+
actor=os.environ.get("BUILD_USER"),
|
|
122
|
+
is_ci=True,
|
|
123
|
+
)
|