evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. evalgate_sdk/__init__.py +707 -0
  2. evalgate_sdk/_version.py +3 -0
  3. evalgate_sdk/assertions.py +1362 -0
  4. evalgate_sdk/auto.py +247 -0
  5. evalgate_sdk/batch.py +174 -0
  6. evalgate_sdk/cache.py +111 -0
  7. evalgate_sdk/ci_context.py +123 -0
  8. evalgate_sdk/cli/__init__.py +111 -0
  9. evalgate_sdk/cli/api.py +261 -0
  10. evalgate_sdk/cli/cli_constants.py +20 -0
  11. evalgate_sdk/cli/commands.py +1041 -0
  12. evalgate_sdk/cli/config.py +228 -0
  13. evalgate_sdk/cli/env.py +43 -0
  14. evalgate_sdk/cli/formatters/types.py +132 -0
  15. evalgate_sdk/cli/golden_commands.py +322 -0
  16. evalgate_sdk/cli/manifest.py +301 -0
  17. evalgate_sdk/cli/new_commands.py +435 -0
  18. evalgate_sdk/cli/policy_packs.py +103 -0
  19. evalgate_sdk/cli/profiles.py +12 -0
  20. evalgate_sdk/cli/regression_gate.py +312 -0
  21. evalgate_sdk/cli/render/__init__.py +1 -0
  22. evalgate_sdk/cli/render/snippet.py +18 -0
  23. evalgate_sdk/cli/render/sort.py +29 -0
  24. evalgate_sdk/cli/report/__init__.py +1 -0
  25. evalgate_sdk/cli/report/build_check_report.py +209 -0
  26. evalgate_sdk/cli/traces.py +186 -0
  27. evalgate_sdk/cli/workspace.py +63 -0
  28. evalgate_sdk/client.py +609 -0
  29. evalgate_sdk/cluster.py +359 -0
  30. evalgate_sdk/collector.py +161 -0
  31. evalgate_sdk/constants.py +6 -0
  32. evalgate_sdk/context.py +151 -0
  33. evalgate_sdk/errors.py +236 -0
  34. evalgate_sdk/export.py +238 -0
  35. evalgate_sdk/formatters/__init__.py +11 -0
  36. evalgate_sdk/formatters/github.py +51 -0
  37. evalgate_sdk/formatters/human.py +68 -0
  38. evalgate_sdk/formatters/json_fmt.py +11 -0
  39. evalgate_sdk/formatters/pr_comment.py +80 -0
  40. evalgate_sdk/golden.py +426 -0
  41. evalgate_sdk/integrations/__init__.py +1 -0
  42. evalgate_sdk/integrations/anthropic.py +99 -0
  43. evalgate_sdk/integrations/autogen.py +62 -0
  44. evalgate_sdk/integrations/crewai.py +61 -0
  45. evalgate_sdk/integrations/langchain.py +100 -0
  46. evalgate_sdk/integrations/openai.py +155 -0
  47. evalgate_sdk/integrations/openai_eval.py +221 -0
  48. evalgate_sdk/local.py +144 -0
  49. evalgate_sdk/logger.py +123 -0
  50. evalgate_sdk/matchers.py +62 -0
  51. evalgate_sdk/otel.py +256 -0
  52. evalgate_sdk/pagination.py +145 -0
  53. evalgate_sdk/py.typed +0 -0
  54. evalgate_sdk/pytest_plugin.py +96 -0
  55. evalgate_sdk/reason_codes.py +103 -0
  56. evalgate_sdk/regression.py +196 -0
  57. evalgate_sdk/replay_decision.py +115 -0
  58. evalgate_sdk/runtime/__init__.py +50 -0
  59. evalgate_sdk/runtime/adapters/__init__.py +1 -0
  60. evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  61. evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  62. evalgate_sdk/runtime/context.py +68 -0
  63. evalgate_sdk/runtime/eval.py +318 -0
  64. evalgate_sdk/runtime/execution_mode.py +170 -0
  65. evalgate_sdk/runtime/executor.py +92 -0
  66. evalgate_sdk/runtime/registry.py +125 -0
  67. evalgate_sdk/runtime/run_report.py +249 -0
  68. evalgate_sdk/runtime/types.py +143 -0
  69. evalgate_sdk/snapshot.py +219 -0
  70. evalgate_sdk/streaming.py +124 -0
  71. evalgate_sdk/synthesize.py +226 -0
  72. evalgate_sdk/testing.py +128 -0
  73. evalgate_sdk/types.py +666 -0
  74. evalgate_sdk/utils/__init__.py +1 -0
  75. evalgate_sdk/utils/input_hash.py +42 -0
  76. evalgate_sdk/workflows.py +264 -0
  77. evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
  78. evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
  79. evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
  80. evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
evalgate_sdk/logger.py ADDED
@@ -0,0 +1,123 @@
1
+ """Structured logger with levels, child loggers, and request/response helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import sys
7
+ import time
8
+ from collections.abc import Callable
9
+ from typing import Any, Literal
10
+
11
+ LogLevel = Literal["trace", "debug", "info", "warn", "error"]
12
+
13
+ _LEVEL_MAP: dict[str, int] = {
14
+ "trace": 5,
15
+ "debug": logging.DEBUG,
16
+ "info": logging.INFO,
17
+ "warn": logging.WARNING,
18
+ "error": logging.ERROR,
19
+ }
20
+
21
+
22
+ class Logger:
23
+ """SDK logger with structured output, child loggers, and request tracing."""
24
+
25
+ def __init__(
26
+ self,
27
+ *,
28
+ level: LogLevel = "info",
29
+ prefix: str = "evalai",
30
+ handler: Callable[[str, str, Any], None] | None = None,
31
+ ) -> None:
32
+ self._level = level
33
+ self._prefix = prefix
34
+ self._handler = handler
35
+ self._py_logger = logging.getLogger(f"evalgate_sdk.{prefix}")
36
+ self._py_logger.setLevel(_LEVEL_MAP.get(level, logging.INFO))
37
+ if not self._py_logger.handlers:
38
+ h = logging.StreamHandler(sys.stderr)
39
+ h.setFormatter(logging.Formatter("%(asctime)s [%(name)s] %(levelname)s %(message)s"))
40
+ self._py_logger.addHandler(h)
41
+
42
+ def set_level(self, level: LogLevel) -> None:
43
+ self._level = level
44
+ self._py_logger.setLevel(_LEVEL_MAP.get(level, logging.INFO))
45
+
46
+ def is_level_enabled(self, level: LogLevel) -> bool:
47
+ return _LEVEL_MAP.get(level, 0) >= _LEVEL_MAP.get(self._level, 0)
48
+
49
+ def _emit(self, level: LogLevel, message: str, data: Any = None) -> None:
50
+ if self._handler:
51
+ self._handler(level, message, data)
52
+ return
53
+ extra = f" {data}" if data is not None else ""
54
+ py_level = _LEVEL_MAP.get(level, logging.INFO)
55
+ self._py_logger.log(py_level, "%s%s", message, extra)
56
+
57
+ def trace(self, message: str, data: Any = None) -> None:
58
+ if self.is_level_enabled("trace"):
59
+ self._emit("trace", message, data)
60
+
61
+ def debug(self, message: str, data: Any = None) -> None:
62
+ if self.is_level_enabled("debug"):
63
+ self._emit("debug", message, data)
64
+
65
+ def info(self, message: str, data: Any = None) -> None:
66
+ if self.is_level_enabled("info"):
67
+ self._emit("info", message, data)
68
+
69
+ def warn(self, message: str, data: Any = None) -> None:
70
+ if self.is_level_enabled("warn"):
71
+ self._emit("warn", message, data)
72
+
73
+ def error(self, message: str, data: Any = None) -> None:
74
+ if self.is_level_enabled("error"):
75
+ self._emit("error", message, data)
76
+
77
+ def log_request(self, method: str, url: str, data: Any = None) -> None:
78
+ self.debug(f"→ {method} {url}", data)
79
+
80
+ def log_response(self, method: str, url: str, status: int, duration_ms: float, data: Any = None) -> None:
81
+ self.debug(f"← {method} {url} {status} ({duration_ms:.0f}ms)", data)
82
+
83
+ def child(self, prefix: str) -> Logger:
84
+ return Logger(
85
+ level=self._level,
86
+ prefix=f"{self._prefix}:{prefix}",
87
+ handler=self._handler,
88
+ )
89
+
90
+
91
+ class RequestLogger:
92
+ """Convenience wrapper that times request/response pairs."""
93
+
94
+ def __init__(self, logger: Logger) -> None:
95
+ self._logger = logger
96
+ self._start: float = 0
97
+
98
+ def on_request(self, method: str, url: str, body: Any = None) -> None:
99
+ self._start = time.monotonic()
100
+ self._logger.log_request(method, url, body)
101
+
102
+ def on_response(self, method: str, url: str, status: int, body: Any = None) -> None:
103
+ elapsed = (time.monotonic() - self._start) * 1000
104
+ self._logger.log_response(method, url, status, elapsed, body)
105
+
106
+
107
+ _global_logger: Logger | None = None
108
+
109
+
110
+ def create_logger(level: LogLevel = "info", **kwargs: Any) -> Logger:
111
+ return Logger(level=level, **kwargs)
112
+
113
+
114
+ def get_logger() -> Logger:
115
+ global _global_logger
116
+ if _global_logger is None:
117
+ _global_logger = create_logger()
118
+ return _global_logger
119
+
120
+
121
+ def set_logger(logger: Logger) -> None:
122
+ global _global_logger
123
+ _global_logger = logger
@@ -0,0 +1,62 @@
1
+ """Pytest plugin — provides ``to_pass_gate`` assertion for regression results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def to_pass_gate(result: Any) -> bool:
9
+ """Check if an eval result passes the gate.
10
+
11
+ Works with ``OpenAIChatEvalResult`` and any object with a ``passed`` attribute.
12
+
13
+ Usage in pytest::
14
+
15
+ from evalgate_sdk.matchers import to_pass_gate
16
+
17
+ result = await openai_chat_eval(...)
18
+ assert to_pass_gate(result)
19
+ """
20
+ if hasattr(result, "passed"):
21
+ return bool(result.passed)
22
+ if isinstance(result, dict):
23
+ return bool(result.get("passed", False))
24
+ return False
25
+
26
+
27
+ class GateAssertionError(AssertionError):
28
+ """Raised when a gate assertion fails with diagnostic info."""
29
+
30
+ def __init__(self, result: Any) -> None:
31
+ self.result = result
32
+ score = getattr(result, "score", "?")
33
+ total = getattr(result, "total", "?")
34
+ passed = getattr(result, "passed_count", "?")
35
+ super().__init__(f"Gate assertion failed: {passed}/{total} passed (score={score})")
36
+
37
+
38
+ def assert_passes_gate(result: Any, message: str = "") -> None:
39
+ """Assert that a result passes the gate, with rich error output.
40
+
41
+ Args:
42
+ result: The eval result to check (must have a ``passed`` attribute or key).
43
+ message: Optional custom error message to display on failure.
44
+ """
45
+ if not to_pass_gate(result):
46
+ if message:
47
+ raise AssertionError(message)
48
+ raise GateAssertionError(result)
49
+
50
+
51
+ # ── Pytest plugin ────────────────────────────────────────────────────
52
+
53
+ try:
54
+ import pytest
55
+
56
+ @pytest.fixture
57
+ def gate_result():
58
+ """Fixture that provides a gate assertion helper."""
59
+ return assert_passes_gate
60
+
61
+ except ImportError:
62
+ pass
evalgate_sdk/otel.py ADDED
@@ -0,0 +1,256 @@
1
+ """OpenTelemetry exporter for WorkflowTracer spans (T6).
2
+
3
+ Port of the TypeScript SDK's ``otel.ts``.
4
+ Converts workflow tracer data into OTLP-compatible JSON payloads.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import random
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ import httpx
16
+
17
+
18
+ def _generate_trace_id() -> str:
19
+ """Generate a 32-hex-char trace ID."""
20
+ return f"{random.getrandbits(128):032x}"
21
+
22
+
23
+ def _generate_span_id() -> str:
24
+ """Generate a 16-hex-char span ID."""
25
+ return f"{random.getrandbits(64):016x}"
26
+
27
+
28
+ def _ms_to_ns(ms: float) -> int:
29
+ """Convert milliseconds to nanoseconds."""
30
+ return int(ms * 1_000_000)
31
+
32
+
33
+ @dataclass
34
+ class OTelAttribute:
35
+ key: str
36
+ value: Any
37
+
38
+ def to_dict(self) -> dict[str, Any]:
39
+ if isinstance(self.value, bool):
40
+ return {"key": self.key, "value": {"boolValue": self.value}}
41
+ if isinstance(self.value, int):
42
+ return {"key": self.key, "value": {"intValue": str(self.value)}}
43
+ if isinstance(self.value, float):
44
+ return {"key": self.key, "value": {"doubleValue": self.value}}
45
+ return {"key": self.key, "value": {"stringValue": str(self.value)}}
46
+
47
+
48
+ @dataclass
49
+ class OTelEvent:
50
+ name: str
51
+ time_unix_nano: int = 0
52
+ attributes: list[OTelAttribute] = field(default_factory=list)
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ return {
56
+ "name": self.name,
57
+ "timeUnixNano": str(self.time_unix_nano),
58
+ "attributes": [a.to_dict() for a in self.attributes],
59
+ }
60
+
61
+
62
+ @dataclass
63
+ class OTelSpan:
64
+ trace_id: str
65
+ span_id: str
66
+ name: str
67
+ start_time_unix_nano: int
68
+ end_time_unix_nano: int
69
+ parent_span_id: str | None = None
70
+ kind: int = 1 # SPAN_KIND_INTERNAL
71
+ status_code: int = 1 # STATUS_CODE_OK
72
+ status_message: str = ""
73
+ attributes: list[OTelAttribute] = field(default_factory=list)
74
+ events: list[OTelEvent] = field(default_factory=list)
75
+
76
+ def to_dict(self) -> dict[str, Any]:
77
+ d: dict[str, Any] = {
78
+ "traceId": self.trace_id,
79
+ "spanId": self.span_id,
80
+ "name": self.name,
81
+ "kind": self.kind,
82
+ "startTimeUnixNano": str(self.start_time_unix_nano),
83
+ "endTimeUnixNano": str(self.end_time_unix_nano),
84
+ "status": {"code": self.status_code, "message": self.status_message},
85
+ "attributes": [a.to_dict() for a in self.attributes],
86
+ "events": [e.to_dict() for e in self.events],
87
+ }
88
+ if self.parent_span_id:
89
+ d["parentSpanId"] = self.parent_span_id
90
+ return d
91
+
92
+
93
+ @dataclass
94
+ class OTelExportPayload:
95
+ """OTLP JSON export payload."""
96
+
97
+ resource_spans: list[dict[str, Any]] = field(default_factory=list)
98
+
99
+ def to_dict(self) -> dict[str, Any]:
100
+ return {"resourceSpans": self.resource_spans}
101
+
102
+
103
+ @dataclass
104
+ class OTelExporterOptions:
105
+ endpoint: str = ""
106
+ service_name: str = "evalgate-sdk"
107
+ headers: dict[str, str] = field(default_factory=dict)
108
+ timeout_ms: int = 10_000
109
+
110
+
111
+ class OTelExporter:
112
+ """Exports evaluation data as OpenTelemetry spans."""
113
+
114
+ def __init__(self, options: OTelExporterOptions | None = None) -> None:
115
+ opts = options or OTelExporterOptions()
116
+ self._endpoint = opts.endpoint or os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318")
117
+ self._service_name = opts.service_name
118
+ self._headers = opts.headers
119
+ self._timeout = opts.timeout_ms / 1000.0
120
+
121
+ def export_from_tracer(self, tracer: Any) -> OTelExportPayload:
122
+ """Convert a WorkflowTracer into an OTLP payload.
123
+
124
+ *tracer* should expose ``.spans`` (list of dicts with name, start_time,
125
+ end_time, metadata, parent_span_id, etc.).
126
+ """
127
+ trace_id = _generate_trace_id()
128
+ spans: list[OTelSpan] = []
129
+
130
+ raw_spans = getattr(tracer, "spans", [])
131
+ for raw in raw_spans:
132
+ span_id = _generate_span_id()
133
+ attrs: list[OTelAttribute] = []
134
+
135
+ if isinstance(raw, dict):
136
+ name = raw.get("name", "unknown")
137
+ start_ns = _ms_to_ns(raw.get("start_time", 0))
138
+ end_ns = _ms_to_ns(raw.get("end_time", start_ns))
139
+ parent = raw.get("parent_span_id")
140
+ for k, v in raw.get("metadata", {}).items():
141
+ attrs.append(OTelAttribute(key=f"evalgate.{k}", value=v))
142
+ else:
143
+ name = getattr(raw, "name", "unknown")
144
+ start_ns = _ms_to_ns(getattr(raw, "start_time", 0))
145
+ end_ns = _ms_to_ns(getattr(raw, "end_time", start_ns))
146
+ parent = getattr(raw, "parent_span_id", None)
147
+
148
+ attrs.append(OTelAttribute(key="evalgate.service", value=self._service_name))
149
+ spans.append(
150
+ OTelSpan(
151
+ trace_id=trace_id,
152
+ span_id=span_id,
153
+ name=name,
154
+ start_time_unix_nano=start_ns,
155
+ end_time_unix_nano=end_ns,
156
+ parent_span_id=parent,
157
+ attributes=attrs,
158
+ )
159
+ )
160
+
161
+ return self._build_payload(spans)
162
+
163
+ def export_run_result(
164
+ self,
165
+ run_id: str,
166
+ results: list[dict[str, Any]],
167
+ start_time_ms: float | None = None,
168
+ end_time_ms: float | None = None,
169
+ ) -> OTelExportPayload:
170
+ """Convert evaluation run results into an OTLP payload."""
171
+ trace_id = _generate_trace_id()
172
+ now_ms = time.time() * 1000
173
+ root_start = _ms_to_ns(start_time_ms or now_ms)
174
+ root_end = _ms_to_ns(end_time_ms or now_ms)
175
+
176
+ root_span_id = _generate_span_id()
177
+ spans: list[OTelSpan] = [
178
+ OTelSpan(
179
+ trace_id=trace_id,
180
+ span_id=root_span_id,
181
+ name=f"evalgate.run.{run_id}",
182
+ start_time_unix_nano=root_start,
183
+ end_time_unix_nano=root_end,
184
+ attributes=[
185
+ OTelAttribute(key="evalgate.run_id", value=run_id),
186
+ OTelAttribute(key="evalgate.service", value=self._service_name),
187
+ ],
188
+ )
189
+ ]
190
+
191
+ for r in results:
192
+ span_id = _generate_span_id()
193
+ duration_ms = r.get("duration_ms", 0)
194
+ s_start = _ms_to_ns(r.get("start_time_ms", now_ms))
195
+ s_end = s_start + _ms_to_ns(duration_ms)
196
+
197
+ status = 1 if r.get("passed") else 2 # OK or ERROR
198
+ attrs = [
199
+ OTelAttribute(key="evalgate.test_id", value=r.get("test_id", "")),
200
+ OTelAttribute(key="evalgate.test_name", value=r.get("test_name", "")),
201
+ OTelAttribute(key="evalgate.score", value=r.get("score", 0)),
202
+ OTelAttribute(key="evalgate.passed", value=r.get("passed", False)),
203
+ ]
204
+ spans.append(
205
+ OTelSpan(
206
+ trace_id=trace_id,
207
+ span_id=span_id,
208
+ parent_span_id=root_span_id,
209
+ name=f"evalgate.spec.{r.get('test_name', 'unknown')}",
210
+ start_time_unix_nano=s_start,
211
+ end_time_unix_nano=s_end,
212
+ status_code=status,
213
+ status_message=r.get("error", ""),
214
+ attributes=attrs,
215
+ )
216
+ )
217
+
218
+ return self._build_payload(spans)
219
+
220
+ def _build_payload(self, spans: list[OTelSpan]) -> OTelExportPayload:
221
+ resource = {
222
+ "resource": {
223
+ "attributes": [
224
+ OTelAttribute(key="service.name", value=self._service_name).to_dict(),
225
+ ],
226
+ },
227
+ "scopeSpans": [
228
+ {
229
+ "scope": {"name": "evalgate-sdk"},
230
+ "spans": [s.to_dict() for s in spans],
231
+ }
232
+ ],
233
+ }
234
+ return OTelExportPayload(resource_spans=[resource])
235
+
236
+ async def send(self, payload: OTelExportPayload) -> bool:
237
+ """POST the OTLP payload to the collector endpoint.
238
+
239
+ Returns False on connection/timeout errors instead of raising.
240
+ """
241
+ url = f"{self._endpoint.rstrip('/')}/v1/traces"
242
+ try:
243
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
244
+ resp = await client.post(
245
+ url,
246
+ json=payload.to_dict(),
247
+ headers={"Content-Type": "application/json", **self._headers},
248
+ )
249
+ return resp.status_code < 400
250
+ except (httpx.ConnectError, httpx.TimeoutException, OSError):
251
+ return False
252
+
253
+
254
+ def create_otel_exporter(options: OTelExporterOptions | None = None) -> OTelExporter:
255
+ """Factory function for OTelExporter."""
256
+ return OTelExporter(options)
@@ -0,0 +1,145 @@
1
+ """Cursor-based pagination helpers — iterators, auto-pagination, and encoding."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+ from collections.abc import AsyncIterator, Callable
8
+ from typing import Any, Generic, TypeVar
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ def encode_cursor(data: Any) -> str:
14
+ """Encode arbitrary data as a base64 cursor string."""
15
+ return base64.urlsafe_b64encode(json.dumps(data).encode()).decode()
16
+
17
+
18
+ def decode_cursor(cursor: str) -> Any:
19
+ """Decode a base64 cursor string back to its original value."""
20
+ return json.loads(base64.urlsafe_b64decode(cursor.encode()).decode())
21
+
22
+
23
+ class PaginatedResponse(Generic[T]):
24
+ """Container for a page of results plus pagination metadata."""
25
+
26
+ def __init__(self, data: list[T], has_more: bool, total: int | None = None) -> None:
27
+ self.data = data
28
+ self.has_more = has_more
29
+ self.total = total
30
+
31
+
32
+ class PaginatedIterator(Generic[T]):
33
+ """Async iterator that automatically fetches pages.
34
+
35
+ Usage::
36
+
37
+ pages = PaginatedIterator(fetch_fn, limit=20)
38
+ async for page in pages:
39
+ for item in page:
40
+ print(item)
41
+
42
+ # or collect everything
43
+ all_items = await pages.to_list()
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ fetch_fn: Callable[[int, int], Any],
49
+ limit: int = 20,
50
+ ) -> None:
51
+ self._fetch_fn = fetch_fn
52
+ self._limit = limit
53
+ self._offset = 0
54
+ self._exhausted = False
55
+
56
+ def __aiter__(self) -> PaginatedIterator[T]:
57
+ return self
58
+
59
+ async def __anext__(self) -> list[T]:
60
+ if self._exhausted:
61
+ raise StopAsyncIteration
62
+
63
+ result = await self._fetch_fn(self._offset, self._limit)
64
+
65
+ if isinstance(result, dict):
66
+ data = result.get("data", [])
67
+ has_more = result.get("has_more", result.get("hasMore", False))
68
+ elif isinstance(result, PaginatedResponse):
69
+ data = result.data
70
+ has_more = result.has_more
71
+ elif isinstance(result, list):
72
+ data = result
73
+ has_more = len(data) >= self._limit
74
+ else:
75
+ data = list(result)
76
+ has_more = len(data) >= self._limit
77
+
78
+ if not data:
79
+ self._exhausted = True
80
+ raise StopAsyncIteration
81
+
82
+ self._offset += len(data)
83
+ if not has_more:
84
+ self._exhausted = True
85
+
86
+ return data
87
+
88
+ async def to_list(self) -> list[T]:
89
+ """Collect all pages into a single flat list."""
90
+ items: list[T] = []
91
+ async for page in self:
92
+ items.extend(page)
93
+ return items
94
+
95
+ def reset(self) -> None:
96
+ self._offset = 0
97
+ self._exhausted = False
98
+
99
+
100
+ def create_paginated_iterator(
101
+ fetch_fn: Callable[[int, int], Any],
102
+ limit: int = 20,
103
+ ) -> PaginatedIterator[Any]:
104
+ """Create a paginated iterator from a fetch function."""
105
+ return PaginatedIterator(fetch_fn, limit)
106
+
107
+
108
+ async def auto_paginate(
109
+ fetch_fn: Callable[[int, int], Any],
110
+ limit: int = 20,
111
+ ) -> AsyncIterator[Any]:
112
+ """Auto-paginate and yield individual items."""
113
+ iterator = PaginatedIterator(fetch_fn, limit)
114
+ async for page in iterator:
115
+ for item in page:
116
+ yield item
117
+
118
+
119
+ def create_pagination_meta(
120
+ items: list[Any],
121
+ limit: int,
122
+ offset: int,
123
+ total: int | None = None,
124
+ ) -> dict[str, Any]:
125
+ """Create pagination metadata for an API response."""
126
+ return {
127
+ "limit": limit,
128
+ "offset": offset,
129
+ "count": len(items),
130
+ "total": total,
131
+ "has_more": total is not None and (offset + len(items)) < total if total else len(items) >= limit,
132
+ }
133
+
134
+
135
+ def parse_pagination_params(
136
+ params: dict[str, Any] | None = None,
137
+ default_limit: int = 20,
138
+ max_limit: int = 100,
139
+ ) -> dict[str, int]:
140
+ """Parse and clamp pagination params."""
141
+ if params is None:
142
+ return {"limit": default_limit, "offset": 0}
143
+ limit = min(int(params.get("limit", default_limit)), max_limit)
144
+ offset = max(int(params.get("offset", 0)), 0)
145
+ return {"limit": limit, "offset": offset}
evalgate_sdk/py.typed ADDED
File without changes
@@ -0,0 +1,96 @@
1
+ """Pytest plugin for EvalGate assertions (T9).
2
+
3
+ Provides custom pytest assertions for eval results and a plugin
4
+ that can be auto-registered via pyproject.toml entry points.
5
+
6
+ Usage::
7
+
8
+ from evalgate_sdk.pytest_plugin import assert_passes_gate, assert_score_above
9
+
10
+ def test_chatbot_quality(eval_result):
11
+ assert_passes_gate(eval_result)
12
+ assert_score_above(eval_result, 90.0)
13
+
14
+ Note: pytest is imported lazily so the SDK can be used without pytest installed.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from typing import TYPE_CHECKING, Any
20
+
21
+ if TYPE_CHECKING:
22
+ import pytest as _pytest_type # noqa: F401
23
+
24
+
25
+ def _get_pytest() -> Any:
26
+ """Lazily import pytest, raising a clear error if not installed."""
27
+ try:
28
+ import pytest
29
+
30
+ return pytest
31
+ except ImportError as exc:
32
+ raise ImportError(
33
+ "pytest is required for evalgate_sdk.pytest_plugin. Install with: pip install pytest"
34
+ ) from exc
35
+
36
+
37
+ def assert_passes_gate(result: Any, message: str = "") -> None:
38
+ """Assert that an eval result passes the quality gate.
39
+
40
+ *result* should have a ``passed`` attribute (or key) that is truthy.
41
+
42
+ This delegates to :func:`evalgate_sdk.matchers.assert_passes_gate` for consistency.
43
+ """
44
+ from evalgate_sdk.matchers import assert_passes_gate as _assert_passes_gate
45
+
46
+ _assert_passes_gate(result, message)
47
+
48
+
49
+ def assert_score_above(result: Any, threshold: float, message: str = "") -> None:
50
+ """Assert that the eval result score is above *threshold*."""
51
+ pytest = _get_pytest()
52
+ score = _get_field(result, "score")
53
+ if score is None:
54
+ pytest.fail(message or "Result has no 'score' field")
55
+ if score < threshold:
56
+ pytest.fail(message or f"Score {score} is below threshold {threshold}")
57
+
58
+
59
+ def assert_score_between(result: Any, min_score: float, max_score: float, message: str = "") -> None:
60
+ """Assert that the eval result score is within [min_score, max_score]."""
61
+ pytest = _get_pytest()
62
+ score = _get_field(result, "score")
63
+ if score is None:
64
+ pytest.fail(message or "Result has no 'score' field")
65
+ if score < min_score or score > max_score:
66
+ pytest.fail(message or f"Score {score} not in [{min_score}, {max_score}]")
67
+
68
+
69
+ def assert_no_errors(result: Any, message: str = "") -> None:
70
+ """Assert that the eval result has no errors."""
71
+ pytest = _get_pytest()
72
+ error = _get_field(result, "error")
73
+ status = _get_field(result, "status")
74
+ if error:
75
+ pytest.fail(message or f"Eval result has error: {error}")
76
+ if status == "error":
77
+ pytest.fail(message or "Eval result status is 'error'")
78
+
79
+
80
+ def assert_all_assertions_passed(result: Any, message: str = "") -> None:
81
+ """Assert that all sub-assertions in the result passed."""
82
+ pytest = _get_pytest()
83
+ assertions = _get_field(result, "assertions") or []
84
+ for i, assertion in enumerate(assertions):
85
+ passed = _get_field(assertion, "passed")
86
+ if not passed:
87
+ name = _get_field(assertion, "assertion_type") or _get_field(assertion, "name") or f"assertion-{i}"
88
+ msg = _get_field(assertion, "message") or "failed"
89
+ pytest.fail(message or f"Sub-assertion '{name}' failed: {msg}")
90
+
91
+
92
+ def _get_field(obj: Any, field: str) -> Any:
93
+ """Get a field from a dict or dataclass/object."""
94
+ if isinstance(obj, dict):
95
+ return obj.get(field)
96
+ return getattr(obj, field, None)