evalgate-sdk 3.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. evalgate_sdk-3.3.1/.gitignore +13 -0
  2. evalgate_sdk-3.3.1/CHANGELOG.md +146 -0
  3. evalgate_sdk-3.3.1/PKG-INFO +608 -0
  4. evalgate_sdk-3.3.1/README.md +556 -0
  5. evalgate_sdk-3.3.1/pyproject.toml +80 -0
  6. evalgate_sdk-3.3.1/src/evalgate_sdk/__init__.py +707 -0
  7. evalgate_sdk-3.3.1/src/evalgate_sdk/_version.py +3 -0
  8. evalgate_sdk-3.3.1/src/evalgate_sdk/assertions.py +1362 -0
  9. evalgate_sdk-3.3.1/src/evalgate_sdk/auto.py +247 -0
  10. evalgate_sdk-3.3.1/src/evalgate_sdk/batch.py +174 -0
  11. evalgate_sdk-3.3.1/src/evalgate_sdk/cache.py +111 -0
  12. evalgate_sdk-3.3.1/src/evalgate_sdk/ci_context.py +123 -0
  13. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/__init__.py +111 -0
  14. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/api.py +261 -0
  15. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/cli_constants.py +20 -0
  16. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/commands.py +1041 -0
  17. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/config.py +228 -0
  18. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/env.py +43 -0
  19. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/formatters/types.py +132 -0
  20. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/golden_commands.py +322 -0
  21. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/manifest.py +301 -0
  22. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/new_commands.py +435 -0
  23. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/policy_packs.py +103 -0
  24. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/profiles.py +12 -0
  25. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/regression_gate.py +312 -0
  26. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/render/__init__.py +1 -0
  27. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/render/snippet.py +18 -0
  28. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/render/sort.py +29 -0
  29. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/report/__init__.py +1 -0
  30. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/report/build_check_report.py +209 -0
  31. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/traces.py +186 -0
  32. evalgate_sdk-3.3.1/src/evalgate_sdk/cli/workspace.py +63 -0
  33. evalgate_sdk-3.3.1/src/evalgate_sdk/client.py +609 -0
  34. evalgate_sdk-3.3.1/src/evalgate_sdk/cluster.py +359 -0
  35. evalgate_sdk-3.3.1/src/evalgate_sdk/collector.py +161 -0
  36. evalgate_sdk-3.3.1/src/evalgate_sdk/constants.py +6 -0
  37. evalgate_sdk-3.3.1/src/evalgate_sdk/context.py +151 -0
  38. evalgate_sdk-3.3.1/src/evalgate_sdk/errors.py +236 -0
  39. evalgate_sdk-3.3.1/src/evalgate_sdk/export.py +238 -0
  40. evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/__init__.py +11 -0
  41. evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/github.py +51 -0
  42. evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/human.py +68 -0
  43. evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/json_fmt.py +11 -0
  44. evalgate_sdk-3.3.1/src/evalgate_sdk/formatters/pr_comment.py +80 -0
  45. evalgate_sdk-3.3.1/src/evalgate_sdk/golden.py +426 -0
  46. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/__init__.py +1 -0
  47. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/anthropic.py +99 -0
  48. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/autogen.py +62 -0
  49. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/crewai.py +61 -0
  50. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/langchain.py +100 -0
  51. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/openai.py +155 -0
  52. evalgate_sdk-3.3.1/src/evalgate_sdk/integrations/openai_eval.py +221 -0
  53. evalgate_sdk-3.3.1/src/evalgate_sdk/local.py +144 -0
  54. evalgate_sdk-3.3.1/src/evalgate_sdk/logger.py +123 -0
  55. evalgate_sdk-3.3.1/src/evalgate_sdk/matchers.py +62 -0
  56. evalgate_sdk-3.3.1/src/evalgate_sdk/otel.py +256 -0
  57. evalgate_sdk-3.3.1/src/evalgate_sdk/pagination.py +145 -0
  58. evalgate_sdk-3.3.1/src/evalgate_sdk/py.typed +0 -0
  59. evalgate_sdk-3.3.1/src/evalgate_sdk/pytest_plugin.py +96 -0
  60. evalgate_sdk-3.3.1/src/evalgate_sdk/reason_codes.py +103 -0
  61. evalgate_sdk-3.3.1/src/evalgate_sdk/regression.py +196 -0
  62. evalgate_sdk-3.3.1/src/evalgate_sdk/replay_decision.py +115 -0
  63. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/__init__.py +50 -0
  64. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/adapters/__init__.py +1 -0
  65. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  66. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  67. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/context.py +68 -0
  68. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/eval.py +318 -0
  69. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/execution_mode.py +170 -0
  70. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/executor.py +92 -0
  71. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/registry.py +125 -0
  72. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/run_report.py +249 -0
  73. evalgate_sdk-3.3.1/src/evalgate_sdk/runtime/types.py +143 -0
  74. evalgate_sdk-3.3.1/src/evalgate_sdk/snapshot.py +219 -0
  75. evalgate_sdk-3.3.1/src/evalgate_sdk/streaming.py +124 -0
  76. evalgate_sdk-3.3.1/src/evalgate_sdk/synthesize.py +226 -0
  77. evalgate_sdk-3.3.1/src/evalgate_sdk/testing.py +128 -0
  78. evalgate_sdk-3.3.1/src/evalgate_sdk/types.py +666 -0
  79. evalgate_sdk-3.3.1/src/evalgate_sdk/utils/__init__.py +1 -0
  80. evalgate_sdk-3.3.1/src/evalgate_sdk/utils/input_hash.py +42 -0
  81. evalgate_sdk-3.3.1/src/evalgate_sdk/workflows.py +264 -0
  82. evalgate_sdk-3.3.1/tests/__init__.py +0 -0
  83. evalgate_sdk-3.3.1/tests/test_assertions.py +245 -0
  84. evalgate_sdk-3.3.1/tests/test_assertions_async.py +210 -0
  85. evalgate_sdk-3.3.1/tests/test_ci_context.py +104 -0
  86. evalgate_sdk-3.3.1/tests/test_cli_golden_commands.py +232 -0
  87. evalgate_sdk-3.3.1/tests/test_cli_new_commands.py +211 -0
  88. evalgate_sdk-3.3.1/tests/test_client.py +164 -0
  89. evalgate_sdk-3.3.1/tests/test_collector.py +122 -0
  90. evalgate_sdk-3.3.1/tests/test_contract_payloads.py +218 -0
  91. evalgate_sdk-3.3.1/tests/test_decorators_and_integrations.py +411 -0
  92. evalgate_sdk-3.3.1/tests/test_errors.py +87 -0
  93. evalgate_sdk-3.3.1/tests/test_execution_mode.py +146 -0
  94. evalgate_sdk-3.3.1/tests/test_formatters.py +114 -0
  95. evalgate_sdk-3.3.1/tests/test_local_storage.py +85 -0
  96. evalgate_sdk-3.3.1/tests/test_new_modules.py +629 -0
  97. evalgate_sdk-3.3.1/tests/test_otel.py +112 -0
  98. evalgate_sdk-3.3.1/tests/test_parity_gap_modules.py +572 -0
  99. evalgate_sdk-3.3.1/tests/test_parity_gaps.py +438 -0
  100. evalgate_sdk-3.3.1/tests/test_prod_hardening.py +263 -0
  101. evalgate_sdk-3.3.1/tests/test_pytest_plugin.py +101 -0
  102. evalgate_sdk-3.3.1/tests/test_run_report.py +179 -0
  103. evalgate_sdk-3.3.1/tests/test_runtime_dsl.py +254 -0
  104. evalgate_sdk-3.3.1/tests/test_testing.py +100 -0
  105. evalgate_sdk-3.3.1/tests/test_types.py +71 -0
  106. evalgate_sdk-3.3.1/tests/test_workflows.py +171 -0
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ dist/
5
+ build/
6
+ *.egg-info/
7
+ .pytest_cache/
8
+ .mypy_cache/
9
+ .ruff_cache/
10
+ htmlcov/
11
+ coverage/
12
+ *.coverage
13
+ junit-results.xml
@@ -0,0 +1,146 @@
1
+ # Changelog
2
+
3
+ All notable changes to the EvalGate Python SDK will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ Version numbering is aligned with the TypeScript SDK (`@evalgate/sdk`) and the platform API.
9
+
10
+ **Version history note:** The Python SDK jumped from 1.0.0 → 1.9.x → 2.0.0 to stay in sync with the TypeScript SDK. The TypeScript SDK had many releases (1.1–1.9) before the Python SDK existed. We now align both SDKs on the same major.minor version.
11
+
12
+ ## [3.3.1] - 2026-03-24
13
+
14
+ ### Changed
15
+ - **PyPI distribution name** — package metadata publishes under `evalgate-sdk` on PyPI (import package remains `evalgate_sdk`).
16
+ - **Install and extras guidance** — CLI/runtime error messages, examples, and docs now consistently use `evalgate-sdk`, `evalgate-sdk[cli]`, `evalgate-sdk[openai]`, and `evalgate-sdk[anthropic]`.
17
+ - **Version alignment** — bumped Python SDK version and spec constants to `3.3.1` for synchronized release metadata.
18
+
19
+ ## [3.3.0] - 2026-03-23
20
+
21
+ ### Changed
22
+ - **Version bump** — Python SDK version and spec constants bumped to 3.3.0, aligned with TypeScript SDK and platform.
23
+
24
+ ## [3.2.7] - 2026-03-22
25
+
26
+ ### Changed
27
+
28
+ - **Release continuation** — Advanced the Python SDK release target to `3.2.7` to stay aligned with the coordinated npm and platform release after another day of shipped fixes.
29
+ - **Package metadata alignment** — Updated `pyproject.toml` and release validation inputs so the next PyPI publish uses the new version number consistently.
30
+
31
+ ### Fixed
32
+
33
+ - **Assertion contract normalization** — sync assertion helpers now consistently return `AssertionResult` with boolean truthiness, including schema, syntax, instruction-following, and required-field checks.
34
+ - **Schema and negation parity** — `matches_schema()` / `expect(...).to_match_json()` accept JSON strings and embedded JSON snippets, and `expect(...).not_` now mirrors the TypeScript fluent negation flow.
35
+ - **`run_assertions()` compatibility** — legacy boolean and mapping-returning assertion callables are coerced into `AssertionResult` so mixed assertion batches no longer fail at runtime.
36
+ - **Starter templates** — Python scaffolded eval examples now use `.passed` when converting assertion results into runtime `create_result()` payloads.
37
+ - **Async assertion configuration parity** — `configure_assertions()` now accepts keyword arguments like `provider=`, `api_key=`, `model=`, and `timeout_ms=` in addition to `AssertionLLMConfig(...)`.
38
+ - **Optional dependency guidance** — async assertion errors, docstrings, and README examples now point to the correct extras such as `pauly4010-evalgate-sdk[openai]` and `pauly4010-evalgate-sdk[anthropic]`.
39
+ - **PyPI package metadata** — author metadata now points to EvalGate organization branding instead of a personal handle.
40
+
41
+ ## [3.0.2] - 2026-03-09
42
+
43
+ ### Changed
44
+
45
+ - **Version alignment** — bumped to v3.0.2 to stay in sync with the platform API and `@evalgate/sdk` TypeScript SDK
46
+ - **Autonomous workflow guidance** — clarified that the newest autonomous loop, clustering, synthesis, and daemon workflows currently ship in the TypeScript CLI; the Python SDK remains compatible with the shared EvalGate dataset, run-artifact, and CI flows while those advanced controls mature cross-SDK.
47
+
48
+ ---
49
+
50
+ ## [2.2.1] - 2026-03-03
51
+
52
+ ### Changed
53
+
54
+ - **Version alignment** — bumped to v2.2.1 to stay in sync with `@evalgate/sdk` TypeScript SDK
55
+ - No Python-specific changes in this release; see [TypeScript SDK CHANGELOG](../../src/packages/sdk/CHANGELOG.md) for 2.2.1 details
56
+
57
+ ---
58
+
59
+ ## [2.2.0] - 2026-03-03
60
+
61
+ ### Changed
62
+
63
+ - **Version alignment** — bumped to v2.2.0 to stay in sync with `@evalgate/sdk` TypeScript SDK
64
+ - No Python-specific changes in this release; see [TypeScript SDK CHANGELOG](../../src/packages/sdk/CHANGELOG.md) for full 2.2.0 details
65
+
66
+ ## [2.1.2] - 2026-03-02
67
+
68
+ ### Fixed
69
+
70
+ - **Type safety** — aligned with platform 2.1.2; all CI checks passing
71
+
72
+ ## [2.1.1] - 2026-03-02
73
+
74
+ ### Fixed
75
+
76
+ - **Contract payload validation** - Fixed ruff errors in test_contract_payloads.py
77
+ - **CI integration** - Resolved test suite compatibility issues
78
+ - **Linting compliance** - Fixed SIM102, E501, SIM105, I001, SIM300 ruff violations
79
+
80
+ ### Changed
81
+
82
+ - **Test coverage** - Improved test matrix for TypeScript/Python SDK compatibility
83
+ - **Documentation** - Updated README with PyPI downloads badge and GitHub stars
84
+
85
+ ## [2.0.0] - 2026-03-01
86
+
87
+ ### Breaking
88
+
89
+ - **Rebrand:** Package renamed `pauly4010-evalai-sdk` → `pauly4010-evalgate-sdk`, module `evalai_sdk` → `evalgate_sdk`
90
+ - **CLI:** `evalai` → `evalgate`
91
+ - **Config:** `.evalai/` → `.evalgate/` (legacy `.evalai/` still read, with deprecation warning)
92
+ - **Env vars:** `EVALAI_*` → `EVALGATE_*` (legacy `EVALAI_*` still work, with deprecation warning)
93
+ - **Error class:** `EvalAIError` → `EvalGateError`
94
+
95
+ ### Added
96
+
97
+ - Deprecation warnings when using legacy env vars or config paths
98
+
99
+ ## [1.9.1] - 2026-03-01
100
+
101
+ ### Fixed
102
+
103
+ - Align `SPEC_VERSION` with OpenAPI spec 1.9.1
104
+ - Ruff lint and format fixes (SIM102, E501, SIM105, I001, SIM300)
105
+
106
+ ### Changed
107
+
108
+ - Package metadata: Production/Stable status, improved description and keywords
109
+ - README: Added PyPI downloads badge, GitHub stars, status section, changelog link
110
+
111
+ ## [1.9.0] - 2026-02-27
112
+
113
+ ### Added
114
+
115
+ - Full parity with TypeScript SDK 1.9.0
116
+ - `evalai ci` CLI command — one-command CI loop
117
+ - Run artifact retention and diff system
118
+ - Impact analysis integration
119
+ - Schema versioning for run reports
120
+
121
+ ### Changed
122
+
123
+ - Exit codes standardized: 0=clean, 1=regressions, 2=config/infra
124
+ - CLI output improvements for CI environments
125
+
126
+ ## [1.0.1] - 2026-02-26
127
+
128
+ ### Fixed
129
+
130
+ - CLI init template and credential resolution
131
+ - Run output formatting
132
+
133
+ ## [1.0.0] - 2026-02-25
134
+
135
+ ### Added
136
+
137
+ - Initial Python SDK release
138
+ - `AIEvalClient` — async HTTP client for EvalGate API
139
+ - 20+ assertions: `expect()`, `to_contain`, `to_not_contain_pii`, `to_be_professional`, etc.
140
+ - Test suites: `create_test_suite`, `TestSuiteConfig`, `TestSuiteCase`
141
+ - Integrations: OpenAI, Anthropic, LangChain, CrewAI, AutoGen tracing
142
+ - Workflow tracing: `WorkflowTracer`, handoffs, cost tracking
143
+ - Regression gates: `evaluate_regression`, `to_pass_gate`
144
+ - CLI: `evalai init`, `evalai run`, `evalai gate`, `evalai ci`, `evalai doctor`
145
+ - Batch processing, caching, pagination, structured errors
146
+ - Full type hints (`py.typed`), mypy and Pyright compatible