evalgate-sdk 3.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalgate_sdk-3.3.1/.gitignore +13 -0
- evalgate_sdk-3.3.1/CHANGELOG.md +146 -0
- evalgate_sdk-3.3.1/PKG-INFO +608 -0
- evalgate_sdk-3.3.1/README.md +556 -0
- evalgate_sdk-3.3.1/pyproject.toml +80 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/__init__.py +707 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/_version.py +3 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/assertions.py +1362 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/auto.py +247 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/batch.py +174 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cache.py +111 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/ci_context.py +123 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/__init__.py +111 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/api.py +261 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/cli_constants.py +20 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/commands.py +1041 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/config.py +228 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/env.py +43 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/formatters/types.py +132 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/golden_commands.py +322 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/manifest.py +301 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/new_commands.py +435 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/policy_packs.py +103 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/profiles.py +12 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/regression_gate.py +312 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/render/__init__.py +1 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/render/snippet.py +18 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/render/sort.py +29 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/report/__init__.py +1 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/report/build_check_report.py +209 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/traces.py +186 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cli/workspace.py +63 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/client.py +609 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/cluster.py +359 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/collector.py +161 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/constants.py +6 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/context.py +151 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/errors.py +236 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/export.py +238 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/__init__.py +11 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/github.py +51 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/human.py +68 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/json_fmt.py +11 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/pr_comment.py +80 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/golden.py +426 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/__init__.py +1 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/anthropic.py +99 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/autogen.py +62 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/crewai.py +61 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/langchain.py +100 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/openai.py +155 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/openai_eval.py +221 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/local.py +144 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/logger.py +123 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/matchers.py +62 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/otel.py +256 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/pagination.py +145 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/py.typed +0 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/pytest_plugin.py +96 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/reason_codes.py +103 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/regression.py +196 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/replay_decision.py +115 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/__init__.py +50 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/adapters/__init__.py +1 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/context.py +68 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/eval.py +318 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/execution_mode.py +170 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/executor.py +92 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/registry.py +125 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/run_report.py +249 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/types.py +143 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/snapshot.py +219 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/streaming.py +124 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/synthesize.py +226 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/testing.py +128 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/types.py +666 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/utils/__init__.py +1 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/utils/input_hash.py +42 -0
- evalgate_sdk-3.3.1/src/evalgate_sdk/workflows.py +264 -0
- evalgate_sdk-3.3.1/tests/__init__.py +0 -0
- evalgate_sdk-3.3.1/tests/test_assertions.py +245 -0
- evalgate_sdk-3.3.1/tests/test_assertions_async.py +210 -0
- evalgate_sdk-3.3.1/tests/test_ci_context.py +104 -0
- evalgate_sdk-3.3.1/tests/test_cli_golden_commands.py +232 -0
- evalgate_sdk-3.3.1/tests/test_cli_new_commands.py +211 -0
- evalgate_sdk-3.3.1/tests/test_client.py +164 -0
- evalgate_sdk-3.3.1/tests/test_collector.py +122 -0
- evalgate_sdk-3.3.1/tests/test_contract_payloads.py +218 -0
- evalgate_sdk-3.3.1/tests/test_decorators_and_integrations.py +411 -0
- evalgate_sdk-3.3.1/tests/test_errors.py +87 -0
- evalgate_sdk-3.3.1/tests/test_execution_mode.py +146 -0
- evalgate_sdk-3.3.1/tests/test_formatters.py +114 -0
- evalgate_sdk-3.3.1/tests/test_local_storage.py +85 -0
- evalgate_sdk-3.3.1/tests/test_new_modules.py +629 -0
- evalgate_sdk-3.3.1/tests/test_otel.py +112 -0
- evalgate_sdk-3.3.1/tests/test_parity_gap_modules.py +572 -0
- evalgate_sdk-3.3.1/tests/test_parity_gaps.py +438 -0
- evalgate_sdk-3.3.1/tests/test_prod_hardening.py +263 -0
- evalgate_sdk-3.3.1/tests/test_pytest_plugin.py +101 -0
- evalgate_sdk-3.3.1/tests/test_run_report.py +179 -0
- evalgate_sdk-3.3.1/tests/test_runtime_dsl.py +254 -0
- evalgate_sdk-3.3.1/tests/test_testing.py +100 -0
- evalgate_sdk-3.3.1/tests/test_types.py +71 -0
- evalgate_sdk-3.3.1/tests/test_workflows.py +171 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to the EvalGate Python SDK will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
Version numbering is aligned with the TypeScript SDK (`@evalgate/sdk`) and the platform API.
|
|
9
|
+
|
|
10
|
+
**Version history note:** The Python SDK jumped from 1.0.0 → 1.9.x → 2.0.0 to stay in sync with the TypeScript SDK. The TypeScript SDK had many releases (1.1–1.9) before the Python SDK existed. We now align both SDKs on the same major.minor version.
|
|
11
|
+
|
|
12
|
+
## [3.3.1] - 2026-03-24
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- **PyPI distribution name** — package metadata publishes under `evalgate-sdk` on PyPI (import package remains `evalgate_sdk`).
|
|
16
|
+
- **Install and extras guidance** — CLI/runtime error messages, examples, and docs now consistently use `evalgate-sdk`, `evalgate-sdk[cli]`, `evalgate-sdk[openai]`, and `evalgate-sdk[anthropic]`.
|
|
17
|
+
- **Version alignment** — bumped Python SDK version and spec constants to `3.3.1` for synchronized release metadata.
|
|
18
|
+
|
|
19
|
+
## [3.3.0] - 2026-03-23
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
- **Version bump** — Python SDK version and spec constants bumped to 3.3.0, aligned with TypeScript SDK and platform.
|
|
23
|
+
|
|
24
|
+
## [3.2.7] - 2026-03-22
|
|
25
|
+
|
|
26
|
+
### Changed
|
|
27
|
+
|
|
28
|
+
- **Release continuation** — Advanced the Python SDK release target to `3.2.7` to stay aligned with the coordinated npm and platform release after another day of shipped fixes.
|
|
29
|
+
- **Package metadata alignment** — Updated `pyproject.toml` and release validation inputs so the next PyPI publish uses the new version number consistently.
|
|
30
|
+
|
|
31
|
+
### Fixed
|
|
32
|
+
|
|
33
|
+
- **Assertion contract normalization** — sync assertion helpers now consistently return `AssertionResult` with boolean truthiness, including schema, syntax, instruction-following, and required-field checks.
|
|
34
|
+
- **Schema and negation parity** — `matches_schema()` / `expect(...).to_match_json()` accept JSON strings and embedded JSON snippets, and `expect(...).not_` now mirrors the TypeScript fluent negation flow.
|
|
35
|
+
- **`run_assertions()` compatibility** — legacy boolean and mapping-returning assertion callables are coerced into `AssertionResult` so mixed assertion batches no longer fail at runtime.
|
|
36
|
+
- **Starter templates** — Python scaffolded eval examples now use `.passed` when converting assertion results into runtime `create_result()` payloads.
|
|
37
|
+
- **Async assertion configuration parity** — `configure_assertions()` now accepts keyword arguments like `provider=`, `api_key=`, `model=`, and `timeout_ms=` in addition to `AssertionLLMConfig(...)`.
|
|
38
|
+
- **Optional dependency guidance** — async assertion errors, docstrings, and README examples now point to the correct extras such as `pauly4010-evalgate-sdk[openai]` and `pauly4010-evalgate-sdk[anthropic]`.
|
|
39
|
+
- **PyPI package metadata** — author metadata now points to EvalGate organization branding instead of a personal handle.
|
|
40
|
+
|
|
41
|
+
## [3.0.2] - 2026-03-09
|
|
42
|
+
|
|
43
|
+
### Changed
|
|
44
|
+
|
|
45
|
+
- **Version alignment** — bumped to v3.0.2 to stay in sync with the platform API and `@evalgate/sdk` TypeScript SDK
|
|
46
|
+
- **Autonomous workflow guidance** — clarified that the newest autonomous loop, clustering, synthesis, and daemon workflows currently ship in the TypeScript CLI; the Python SDK remains compatible with the shared EvalGate dataset, run-artifact, and CI flows while those advanced controls mature cross-SDK.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## [2.2.1] - 2026-03-03
|
|
51
|
+
|
|
52
|
+
### Changed
|
|
53
|
+
|
|
54
|
+
- **Version alignment** — bumped to v2.2.1 to stay in sync with `@evalgate/sdk` TypeScript SDK
|
|
55
|
+
- No Python-specific changes in this release; see [TypeScript SDK CHANGELOG](../../src/packages/sdk/CHANGELOG.md) for 2.2.1 details
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## [2.2.0] - 2026-03-03
|
|
60
|
+
|
|
61
|
+
### Changed
|
|
62
|
+
|
|
63
|
+
- **Version alignment** — bumped to v2.2.0 to stay in sync with `@evalgate/sdk` TypeScript SDK
|
|
64
|
+
- No Python-specific changes in this release; see [TypeScript SDK CHANGELOG](../../src/packages/sdk/CHANGELOG.md) for full 2.2.0 details
|
|
65
|
+
|
|
66
|
+
## [2.1.2] - 2026-03-02
|
|
67
|
+
|
|
68
|
+
### Fixed
|
|
69
|
+
|
|
70
|
+
- **Type safety** — aligned with platform 2.1.2; all CI checks passing
|
|
71
|
+
|
|
72
|
+
## [2.1.1] - 2026-03-02
|
|
73
|
+
|
|
74
|
+
### Fixed
|
|
75
|
+
|
|
76
|
+
- **Contract payload validation** - Fixed ruff errors in test_contract_payloads.py
|
|
77
|
+
- **CI integration** - Resolved test suite compatibility issues
|
|
78
|
+
- **Linting compliance** - Fixed SIM102, E501, SIM105, I001, SIM300 ruff violations
|
|
79
|
+
|
|
80
|
+
### Changed
|
|
81
|
+
|
|
82
|
+
- **Test coverage** - Improved test matrix for TypeScript/Python SDK compatibility
|
|
83
|
+
- **Documentation** - Updated README with PyPI downloads badge and GitHub stars
|
|
84
|
+
|
|
85
|
+
## [2.0.0] - 2026-03-01
|
|
86
|
+
|
|
87
|
+
### Breaking
|
|
88
|
+
|
|
89
|
+
- **Rebrand:** Package renamed `pauly4010-evalai-sdk` → `pauly4010-evalgate-sdk`, module `evalai_sdk` → `evalgate_sdk`
|
|
90
|
+
- **CLI:** `evalai` → `evalgate`
|
|
91
|
+
- **Config:** `.evalai/` → `.evalgate/` (legacy `.evalai/` still read, with deprecation warning)
|
|
92
|
+
- **Env vars:** `EVALAI_*` → `EVALGATE_*` (legacy `EVALAI_*` still work, with deprecation warning)
|
|
93
|
+
- **Error class:** `EvalAIError` → `EvalGateError`
|
|
94
|
+
|
|
95
|
+
### Added
|
|
96
|
+
|
|
97
|
+
- Deprecation warnings when using legacy env vars or config paths
|
|
98
|
+
|
|
99
|
+
## [1.9.1] - 2026-03-01
|
|
100
|
+
|
|
101
|
+
### Fixed
|
|
102
|
+
|
|
103
|
+
- Align `SPEC_VERSION` with OpenAPI spec 1.9.1
|
|
104
|
+
- Ruff lint and format fixes (SIM102, E501, SIM105, I001, SIM300)
|
|
105
|
+
|
|
106
|
+
### Changed
|
|
107
|
+
|
|
108
|
+
- Package metadata: Production/Stable status, improved description and keywords
|
|
109
|
+
- README: Added PyPI downloads badge, GitHub stars, status section, changelog link
|
|
110
|
+
|
|
111
|
+
## [1.9.0] - 2026-02-27
|
|
112
|
+
|
|
113
|
+
### Added
|
|
114
|
+
|
|
115
|
+
- Full parity with TypeScript SDK 1.9.0
|
|
116
|
+
- `evalai ci` CLI command — one-command CI loop
|
|
117
|
+
- Run artifact retention and diff system
|
|
118
|
+
- Impact analysis integration
|
|
119
|
+
- Schema versioning for run reports
|
|
120
|
+
|
|
121
|
+
### Changed
|
|
122
|
+
|
|
123
|
+
- Exit codes standardized: 0=clean, 1=regressions, 2=config/infra
|
|
124
|
+
- CLI output improvements for CI environments
|
|
125
|
+
|
|
126
|
+
## [1.0.1] - 2026-02-26
|
|
127
|
+
|
|
128
|
+
### Fixed
|
|
129
|
+
|
|
130
|
+
- CLI init template and credential resolution
|
|
131
|
+
- Run output formatting
|
|
132
|
+
|
|
133
|
+
## [1.0.0] - 2026-02-25
|
|
134
|
+
|
|
135
|
+
### Added
|
|
136
|
+
|
|
137
|
+
- Initial Python SDK release
|
|
138
|
+
- `AIEvalClient` — async HTTP client for EvalGate API
|
|
139
|
+
- 20+ assertions: `expect()`, `to_contain`, `to_not_contain_pii`, `to_be_professional`, etc.
|
|
140
|
+
- Test suites: `create_test_suite`, `TestSuiteConfig`, `TestSuiteCase`
|
|
141
|
+
- Integrations: OpenAI, Anthropic, LangChain, CrewAI, AutoGen tracing
|
|
142
|
+
- Workflow tracing: `WorkflowTracer`, handoffs, cost tracking
|
|
143
|
+
- Regression gates: `evaluate_regression`, `to_pass_gate`
|
|
144
|
+
- CLI: `evalai init`, `evalai run`, `evalai gate`, `evalai ci`, `evalai doctor`
|
|
145
|
+
- Batch processing, caching, pagination, structured errors
|
|
146
|
+
- Full type hints (`py.typed`), mypy and Pyright compatible
|