evalgate-sdk 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evalgate_sdk/__init__.py +707 -0
- evalgate_sdk/_version.py +3 -0
- evalgate_sdk/assertions.py +1362 -0
- evalgate_sdk/auto.py +247 -0
- evalgate_sdk/batch.py +174 -0
- evalgate_sdk/cache.py +111 -0
- evalgate_sdk/ci_context.py +123 -0
- evalgate_sdk/cli/__init__.py +111 -0
- evalgate_sdk/cli/api.py +261 -0
- evalgate_sdk/cli/cli_constants.py +20 -0
- evalgate_sdk/cli/commands.py +1041 -0
- evalgate_sdk/cli/config.py +228 -0
- evalgate_sdk/cli/env.py +43 -0
- evalgate_sdk/cli/formatters/types.py +132 -0
- evalgate_sdk/cli/golden_commands.py +322 -0
- evalgate_sdk/cli/manifest.py +301 -0
- evalgate_sdk/cli/new_commands.py +435 -0
- evalgate_sdk/cli/policy_packs.py +103 -0
- evalgate_sdk/cli/profiles.py +12 -0
- evalgate_sdk/cli/regression_gate.py +312 -0
- evalgate_sdk/cli/render/__init__.py +1 -0
- evalgate_sdk/cli/render/snippet.py +18 -0
- evalgate_sdk/cli/render/sort.py +29 -0
- evalgate_sdk/cli/report/__init__.py +1 -0
- evalgate_sdk/cli/report/build_check_report.py +209 -0
- evalgate_sdk/cli/traces.py +186 -0
- evalgate_sdk/cli/workspace.py +63 -0
- evalgate_sdk/client.py +609 -0
- evalgate_sdk/cluster.py +359 -0
- evalgate_sdk/collector.py +161 -0
- evalgate_sdk/constants.py +6 -0
- evalgate_sdk/context.py +151 -0
- evalgate_sdk/errors.py +236 -0
- evalgate_sdk/export.py +238 -0
- evalgate_sdk/formatters/__init__.py +11 -0
- evalgate_sdk/formatters/github.py +51 -0
- evalgate_sdk/formatters/human.py +68 -0
- evalgate_sdk/formatters/json_fmt.py +11 -0
- evalgate_sdk/formatters/pr_comment.py +80 -0
- evalgate_sdk/golden.py +426 -0
- evalgate_sdk/integrations/__init__.py +1 -0
- evalgate_sdk/integrations/anthropic.py +99 -0
- evalgate_sdk/integrations/autogen.py +62 -0
- evalgate_sdk/integrations/crewai.py +61 -0
- evalgate_sdk/integrations/langchain.py +100 -0
- evalgate_sdk/integrations/openai.py +155 -0
- evalgate_sdk/integrations/openai_eval.py +221 -0
- evalgate_sdk/local.py +144 -0
- evalgate_sdk/logger.py +123 -0
- evalgate_sdk/matchers.py +62 -0
- evalgate_sdk/otel.py +256 -0
- evalgate_sdk/pagination.py +145 -0
- evalgate_sdk/py.typed +0 -0
- evalgate_sdk/pytest_plugin.py +96 -0
- evalgate_sdk/reason_codes.py +103 -0
- evalgate_sdk/regression.py +196 -0
- evalgate_sdk/replay_decision.py +115 -0
- evalgate_sdk/runtime/__init__.py +50 -0
- evalgate_sdk/runtime/adapters/__init__.py +1 -0
- evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
- evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
- evalgate_sdk/runtime/context.py +68 -0
- evalgate_sdk/runtime/eval.py +318 -0
- evalgate_sdk/runtime/execution_mode.py +170 -0
- evalgate_sdk/runtime/executor.py +92 -0
- evalgate_sdk/runtime/registry.py +125 -0
- evalgate_sdk/runtime/run_report.py +249 -0
- evalgate_sdk/runtime/types.py +143 -0
- evalgate_sdk/snapshot.py +219 -0
- evalgate_sdk/streaming.py +124 -0
- evalgate_sdk/synthesize.py +226 -0
- evalgate_sdk/testing.py +128 -0
- evalgate_sdk/types.py +666 -0
- evalgate_sdk/utils/__init__.py +1 -0
- evalgate_sdk/utils/input_hash.py +42 -0
- evalgate_sdk/workflows.py +264 -0
- evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
- evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
- evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
- evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
evalgate_sdk/__init__.py
ADDED
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
"""EvalGate SDK — EvalGate client for Python."""
|
|
2
|
+
|
|
3
|
+
from evalgate_sdk._version import SDK_VERSION, SPEC_VERSION, __version__
|
|
4
|
+
from evalgate_sdk.assertions import (
|
|
5
|
+
AssertionLLMConfig,
|
|
6
|
+
AssertionResult,
|
|
7
|
+
Expectation,
|
|
8
|
+
configure_assertions,
|
|
9
|
+
contains_all_required_fields,
|
|
10
|
+
contains_json,
|
|
11
|
+
contains_keywords,
|
|
12
|
+
contains_language,
|
|
13
|
+
contains_language_async,
|
|
14
|
+
expect,
|
|
15
|
+
follows_instructions,
|
|
16
|
+
get_assertion_config,
|
|
17
|
+
has_consistency,
|
|
18
|
+
has_consistency_async,
|
|
19
|
+
has_factual_accuracy,
|
|
20
|
+
has_factual_accuracy_async,
|
|
21
|
+
has_length,
|
|
22
|
+
has_no_hallucinations,
|
|
23
|
+
has_no_hallucinations_async,
|
|
24
|
+
has_no_toxicity,
|
|
25
|
+
has_no_toxicity_async,
|
|
26
|
+
has_pii,
|
|
27
|
+
has_readability_score,
|
|
28
|
+
has_sentiment,
|
|
29
|
+
has_sentiment_async,
|
|
30
|
+
has_sentiment_with_score,
|
|
31
|
+
has_valid_code_syntax,
|
|
32
|
+
has_valid_code_syntax_async,
|
|
33
|
+
is_valid_email,
|
|
34
|
+
is_valid_url,
|
|
35
|
+
matches_pattern,
|
|
36
|
+
matches_schema,
|
|
37
|
+
not_contains_pii,
|
|
38
|
+
responded_within_duration,
|
|
39
|
+
responded_within_time,
|
|
40
|
+
responded_within_time_since,
|
|
41
|
+
run_assertions,
|
|
42
|
+
similar_to,
|
|
43
|
+
to_semantically_contain,
|
|
44
|
+
within_range,
|
|
45
|
+
)
|
|
46
|
+
from evalgate_sdk.auto import (
|
|
47
|
+
DEFAULT_AUTO_HISTORY_PATH,
|
|
48
|
+
DEFAULT_AUTO_REPORT_PATH,
|
|
49
|
+
AutoDiffSnapshot,
|
|
50
|
+
AutoIterationResult,
|
|
51
|
+
AutoOptions,
|
|
52
|
+
AutoPlanStep,
|
|
53
|
+
AutoReport,
|
|
54
|
+
append_auto_history,
|
|
55
|
+
build_auto_plan,
|
|
56
|
+
build_auto_report,
|
|
57
|
+
decide_auto_experiment,
|
|
58
|
+
format_auto_human,
|
|
59
|
+
run_auto_daemon,
|
|
60
|
+
write_auto_report,
|
|
61
|
+
)
|
|
62
|
+
from evalgate_sdk.batch import RequestBatcher, batch_process, can_batch
|
|
63
|
+
from evalgate_sdk.cache import CacheTTL, RequestCache, get_ttl, should_cache
|
|
64
|
+
from evalgate_sdk.ci_context import CIContext, detect_ci_context
|
|
65
|
+
from evalgate_sdk.cluster import ClusterCase, ClusterSample, ClusterSummary, TraceCluster, cluster_run_result, format_cluster_human
|
|
66
|
+
from evalgate_sdk.cli.api import (
|
|
67
|
+
FetchOptions,
|
|
68
|
+
PublishShareResult,
|
|
69
|
+
QualityLatestData,
|
|
70
|
+
RunDetailsData,
|
|
71
|
+
fetch_api,
|
|
72
|
+
fetch_quality_latest,
|
|
73
|
+
fetch_run_details,
|
|
74
|
+
fetch_run_export,
|
|
75
|
+
import_run_on_fail,
|
|
76
|
+
publish_share,
|
|
77
|
+
)
|
|
78
|
+
from evalgate_sdk.cli.cli_constants import EXIT
|
|
79
|
+
from evalgate_sdk.cli.config import (
|
|
80
|
+
EvalAIConfig,
|
|
81
|
+
EvalGateConfig,
|
|
82
|
+
find_config_path,
|
|
83
|
+
load_config,
|
|
84
|
+
merge_config_with_args,
|
|
85
|
+
)
|
|
86
|
+
from evalgate_sdk.cli.env import get_github_step_summary_path, is_ci, is_git_ref, is_github_actions
|
|
87
|
+
from evalgate_sdk.cli.formatters.types import (
|
|
88
|
+
CHECK_REPORT_SCHEMA_VERSION,
|
|
89
|
+
CheckReport,
|
|
90
|
+
FailedCase,
|
|
91
|
+
GateThresholds,
|
|
92
|
+
ScoreBreakdown01,
|
|
93
|
+
ScoreContribPts,
|
|
94
|
+
)
|
|
95
|
+
from evalgate_sdk.cli.manifest import (
|
|
96
|
+
EvaluationManifest,
|
|
97
|
+
SpecAnalysis,
|
|
98
|
+
generate_manifest,
|
|
99
|
+
read_lock,
|
|
100
|
+
read_manifest,
|
|
101
|
+
write_manifest,
|
|
102
|
+
)
|
|
103
|
+
from evalgate_sdk.cli.policy_packs import (
|
|
104
|
+
POLICY_PACKS,
|
|
105
|
+
PolicyPack,
|
|
106
|
+
get_valid_policy_versions,
|
|
107
|
+
resolve_policy_pack,
|
|
108
|
+
)
|
|
109
|
+
from evalgate_sdk.cli.regression_gate import (
|
|
110
|
+
BuiltinReport,
|
|
111
|
+
run_builtin_gate,
|
|
112
|
+
run_gate,
|
|
113
|
+
)
|
|
114
|
+
from evalgate_sdk.cli.regression_gate import (
|
|
115
|
+
format_github as format_gate_github,
|
|
116
|
+
)
|
|
117
|
+
from evalgate_sdk.cli.regression_gate import (
|
|
118
|
+
format_human as format_gate_human,
|
|
119
|
+
)
|
|
120
|
+
from evalgate_sdk.cli.render.snippet import truncate_snippet
|
|
121
|
+
from evalgate_sdk.cli.render.sort import sort_failed_cases
|
|
122
|
+
from evalgate_sdk.cli.report.build_check_report import build_check_report, compute_contrib_pts
|
|
123
|
+
from evalgate_sdk.cli.traces import (
|
|
124
|
+
RunTrace,
|
|
125
|
+
SpecTrace,
|
|
126
|
+
build_run_trace,
|
|
127
|
+
calculate_percentiles,
|
|
128
|
+
format_latency_table,
|
|
129
|
+
write_traces,
|
|
130
|
+
)
|
|
131
|
+
from evalgate_sdk.cli.workspace import EvalWorkspace, resolve_eval_workspace
|
|
132
|
+
from evalgate_sdk.client import AIEvalClient
|
|
133
|
+
from evalgate_sdk.collector import (
|
|
134
|
+
CollectorFeedbackInput,
|
|
135
|
+
CollectorSpanInput,
|
|
136
|
+
ReportTraceInput,
|
|
137
|
+
ReportTraceOptions,
|
|
138
|
+
ReportTraceResult,
|
|
139
|
+
report_trace,
|
|
140
|
+
)
|
|
141
|
+
from evalgate_sdk.constants import DEFAULT_BASE_URL
|
|
142
|
+
from evalgate_sdk.context import (
|
|
143
|
+
ContextMetadata,
|
|
144
|
+
EvalContext,
|
|
145
|
+
WithContext,
|
|
146
|
+
clone_context,
|
|
147
|
+
create_context,
|
|
148
|
+
get_current_context,
|
|
149
|
+
merge_contexts,
|
|
150
|
+
merge_with_context,
|
|
151
|
+
validate_context,
|
|
152
|
+
with_context,
|
|
153
|
+
with_context_sync,
|
|
154
|
+
)
|
|
155
|
+
from evalgate_sdk.errors import (
|
|
156
|
+
AuthenticationError,
|
|
157
|
+
EvalGateError,
|
|
158
|
+
NetworkError,
|
|
159
|
+
RateLimitError,
|
|
160
|
+
ValidationError,
|
|
161
|
+
create_error_from_response,
|
|
162
|
+
)
|
|
163
|
+
from evalgate_sdk.export import (
|
|
164
|
+
ExportData,
|
|
165
|
+
ExportFormat,
|
|
166
|
+
ExportOptions,
|
|
167
|
+
ImportOptions,
|
|
168
|
+
ImportResult,
|
|
169
|
+
convert_to_csv,
|
|
170
|
+
export_data,
|
|
171
|
+
export_to_file,
|
|
172
|
+
import_data,
|
|
173
|
+
import_from_file,
|
|
174
|
+
import_from_langsmith,
|
|
175
|
+
)
|
|
176
|
+
from evalgate_sdk.formatters import format_github, format_human, format_json, format_pr_comment
|
|
177
|
+
from evalgate_sdk.golden import (
|
|
178
|
+
DEFAULT_LABELED_DATASET_PATH,
|
|
179
|
+
DEFAULT_SYNTHETIC_DATASET_PATH,
|
|
180
|
+
AnalyzeSummary,
|
|
181
|
+
FailureModeSummary,
|
|
182
|
+
LabeledGoldenCase,
|
|
183
|
+
NormalizedRunArtifact,
|
|
184
|
+
NormalizedRunCase,
|
|
185
|
+
RunMetrics,
|
|
186
|
+
SyntheticGoldenCase,
|
|
187
|
+
analyze_labeled_dataset,
|
|
188
|
+
extract_run_metrics,
|
|
189
|
+
format_analyze_human,
|
|
190
|
+
normalize_run_artifact,
|
|
191
|
+
parse_labeled_dataset,
|
|
192
|
+
write_jsonl,
|
|
193
|
+
)
|
|
194
|
+
from evalgate_sdk.integrations.anthropic import trace_anthropic, trace_anthropic_call
|
|
195
|
+
from evalgate_sdk.integrations.autogen import trace_autogen
|
|
196
|
+
from evalgate_sdk.integrations.crewai import trace_crewai
|
|
197
|
+
from evalgate_sdk.integrations.langchain import trace_langchain
|
|
198
|
+
from evalgate_sdk.integrations.openai import trace_openai, trace_openai_call
|
|
199
|
+
from evalgate_sdk.integrations.openai_eval import (
|
|
200
|
+
OpenAIChatEvalCase,
|
|
201
|
+
OpenAIChatEvalCaseResult,
|
|
202
|
+
OpenAIChatEvalResult,
|
|
203
|
+
openai_chat_eval,
|
|
204
|
+
)
|
|
205
|
+
from evalgate_sdk.local import LocalStorage, LocalStorageStats
|
|
206
|
+
from evalgate_sdk.logger import Logger, RequestLogger, create_logger, get_logger, set_logger
|
|
207
|
+
from evalgate_sdk.matchers import GateAssertionError, assert_passes_gate, to_pass_gate
|
|
208
|
+
from evalgate_sdk.otel import OTelExporter, OTelExporterOptions, OTelExportPayload, create_otel_exporter
|
|
209
|
+
from evalgate_sdk.pagination import (
|
|
210
|
+
PaginatedIterator,
|
|
211
|
+
PaginatedResponse,
|
|
212
|
+
auto_paginate,
|
|
213
|
+
create_paginated_iterator,
|
|
214
|
+
create_pagination_meta,
|
|
215
|
+
decode_cursor,
|
|
216
|
+
encode_cursor,
|
|
217
|
+
parse_pagination_params,
|
|
218
|
+
)
|
|
219
|
+
from evalgate_sdk.pytest_plugin import (
|
|
220
|
+
assert_all_assertions_passed,
|
|
221
|
+
assert_no_errors,
|
|
222
|
+
assert_score_above,
|
|
223
|
+
assert_score_between,
|
|
224
|
+
)
|
|
225
|
+
from evalgate_sdk.reason_codes import REASON_CODES, get_reason_info, is_blocking
|
|
226
|
+
from evalgate_sdk.replay_decision import NormalizedBudgetConfig, ReplayDecision, determine_comparison_basis, evaluate_replay_outcome
|
|
227
|
+
from evalgate_sdk.regression import (
|
|
228
|
+
ARTIFACTS,
|
|
229
|
+
GATE_CATEGORY,
|
|
230
|
+
GATE_EXIT,
|
|
231
|
+
REPORT_SCHEMA_VERSION,
|
|
232
|
+
Baseline,
|
|
233
|
+
BaselineTolerance,
|
|
234
|
+
RegressionDelta,
|
|
235
|
+
RegressionReport,
|
|
236
|
+
compute_baseline_checksum,
|
|
237
|
+
evaluate_regression,
|
|
238
|
+
verify_baseline_checksum,
|
|
239
|
+
)
|
|
240
|
+
from evalgate_sdk.runtime import (
|
|
241
|
+
EvalExecutionError,
|
|
242
|
+
EvalRuntimeError,
|
|
243
|
+
EvalSDKRuntimeError,
|
|
244
|
+
SpecExecutionError,
|
|
245
|
+
SpecRegistrationError,
|
|
246
|
+
)
|
|
247
|
+
from evalgate_sdk.runtime.adapters.config_to_dsl import (
|
|
248
|
+
MigrationResult,
|
|
249
|
+
migrate_config_to_dsl,
|
|
250
|
+
migrate_project_to_dsl,
|
|
251
|
+
migrate_testsuite_to_dsl,
|
|
252
|
+
)
|
|
253
|
+
from evalgate_sdk.runtime.adapters.testsuite_to_dsl import (
|
|
254
|
+
TestDefinition as LegacyTestDefinition,
|
|
255
|
+
)
|
|
256
|
+
from evalgate_sdk.runtime.adapters.testsuite_to_dsl import (
|
|
257
|
+
TestSuiteAdapterOptions,
|
|
258
|
+
adapt_test_suite,
|
|
259
|
+
generate_define_eval_code,
|
|
260
|
+
)
|
|
261
|
+
from evalgate_sdk.runtime.context import (
|
|
262
|
+
clone_runtime_context,
|
|
263
|
+
create_runtime_context,
|
|
264
|
+
merge_runtime_contexts,
|
|
265
|
+
validate_runtime_context,
|
|
266
|
+
)
|
|
267
|
+
from evalgate_sdk.runtime.eval import (
|
|
268
|
+
create_result,
|
|
269
|
+
define_eval,
|
|
270
|
+
define_eval_only,
|
|
271
|
+
define_eval_skip,
|
|
272
|
+
define_suite,
|
|
273
|
+
evalai,
|
|
274
|
+
from_dataset,
|
|
275
|
+
get_filtered_specs,
|
|
276
|
+
)
|
|
277
|
+
from evalgate_sdk.runtime.execution_mode import (
|
|
278
|
+
ExecutionModeConfig,
|
|
279
|
+
get_execution_mode,
|
|
280
|
+
validate_execution_mode,
|
|
281
|
+
)
|
|
282
|
+
from evalgate_sdk.runtime.executor import (
|
|
283
|
+
LocalExecutor,
|
|
284
|
+
create_local_executor,
|
|
285
|
+
default_local_executor,
|
|
286
|
+
)
|
|
287
|
+
from evalgate_sdk.runtime.registry import (
|
|
288
|
+
create_eval_runtime,
|
|
289
|
+
dispose_active_runtime,
|
|
290
|
+
get_active_runtime,
|
|
291
|
+
set_active_runtime,
|
|
292
|
+
with_runtime,
|
|
293
|
+
)
|
|
294
|
+
from evalgate_sdk.runtime.run_report import (
|
|
295
|
+
RunReport,
|
|
296
|
+
RunReportBuilder,
|
|
297
|
+
RunResult,
|
|
298
|
+
RunSummary,
|
|
299
|
+
create_run_report,
|
|
300
|
+
parse_run_report,
|
|
301
|
+
)
|
|
302
|
+
from evalgate_sdk.runtime.types import (
|
|
303
|
+
DependsOn,
|
|
304
|
+
EvalResult,
|
|
305
|
+
EvalSpec,
|
|
306
|
+
ExecutorCapabilities,
|
|
307
|
+
SpecConfig,
|
|
308
|
+
SpecOptions,
|
|
309
|
+
)
|
|
310
|
+
from evalgate_sdk.snapshot import (
|
|
311
|
+
SnapshotComparison,
|
|
312
|
+
SnapshotData,
|
|
313
|
+
SnapshotManager,
|
|
314
|
+
SnapshotMetadata,
|
|
315
|
+
compare_with_snapshot,
|
|
316
|
+
delete_snapshot,
|
|
317
|
+
list_snapshots,
|
|
318
|
+
load_snapshot,
|
|
319
|
+
snapshot,
|
|
320
|
+
)
|
|
321
|
+
from evalgate_sdk.streaming import (
|
|
322
|
+
BatchProgress,
|
|
323
|
+
BatchResult,
|
|
324
|
+
RateLimiter,
|
|
325
|
+
batch_read,
|
|
326
|
+
chunk,
|
|
327
|
+
stream_evaluation,
|
|
328
|
+
)
|
|
329
|
+
from evalgate_sdk.synthesize import (
|
|
330
|
+
DimensionMatrix,
|
|
331
|
+
SynthesizeSummary,
|
|
332
|
+
format_synthesize_human,
|
|
333
|
+
parse_dimension_matrix,
|
|
334
|
+
synthesize_labeled_dataset,
|
|
335
|
+
)
|
|
336
|
+
from evalgate_sdk.testing import TestSuite, create_test_suite
|
|
337
|
+
from evalgate_sdk.types import CamelModel, QualityBreakdown, QualityScore
|
|
338
|
+
from evalgate_sdk.utils.input_hash import normalize_input, sha256_input
|
|
339
|
+
from evalgate_sdk.workflows import WorkflowTracer, create_workflow_tracer, trace_workflow_step
|
|
340
|
+
|
|
341
|
+
__all__ = [
|
|
342
|
+
# Version
|
|
343
|
+
"__version__",
|
|
344
|
+
"SDK_VERSION",
|
|
345
|
+
"SPEC_VERSION",
|
|
346
|
+
# Client
|
|
347
|
+
"AIEvalClient",
|
|
348
|
+
# Errors
|
|
349
|
+
"EvalGateError",
|
|
350
|
+
"RateLimitError",
|
|
351
|
+
"AuthenticationError",
|
|
352
|
+
"NetworkError",
|
|
353
|
+
"ValidationError",
|
|
354
|
+
"create_error_from_response",
|
|
355
|
+
# Assertions
|
|
356
|
+
"expect",
|
|
357
|
+
"Expectation",
|
|
358
|
+
"AssertionResult",
|
|
359
|
+
"AssertionLLMConfig",
|
|
360
|
+
"DEFAULT_AUTO_HISTORY_PATH",
|
|
361
|
+
"DEFAULT_AUTO_REPORT_PATH",
|
|
362
|
+
"DEFAULT_LABELED_DATASET_PATH",
|
|
363
|
+
"DEFAULT_SYNTHETIC_DATASET_PATH",
|
|
364
|
+
"run_assertions",
|
|
365
|
+
"contains_keywords",
|
|
366
|
+
"matches_pattern",
|
|
367
|
+
"has_length",
|
|
368
|
+
"has_sentiment",
|
|
369
|
+
"has_sentiment_async",
|
|
370
|
+
"has_sentiment_with_score",
|
|
371
|
+
"similar_to",
|
|
372
|
+
"within_range",
|
|
373
|
+
"is_valid_email",
|
|
374
|
+
"is_valid_url",
|
|
375
|
+
"not_contains_pii",
|
|
376
|
+
"has_pii",
|
|
377
|
+
"has_no_hallucinations",
|
|
378
|
+
"has_no_hallucinations_async",
|
|
379
|
+
"matches_schema",
|
|
380
|
+
"contains_json",
|
|
381
|
+
"contains_language",
|
|
382
|
+
"contains_language_async",
|
|
383
|
+
"has_readability_score",
|
|
384
|
+
"has_factual_accuracy",
|
|
385
|
+
"has_factual_accuracy_async",
|
|
386
|
+
"responded_within_time",
|
|
387
|
+
"responded_within_duration",
|
|
388
|
+
"responded_within_time_since",
|
|
389
|
+
"has_no_toxicity",
|
|
390
|
+
"has_no_toxicity_async",
|
|
391
|
+
"has_valid_code_syntax",
|
|
392
|
+
"has_valid_code_syntax_async",
|
|
393
|
+
"has_consistency",
|
|
394
|
+
"has_consistency_async",
|
|
395
|
+
"to_semantically_contain",
|
|
396
|
+
"configure_assertions",
|
|
397
|
+
"get_assertion_config",
|
|
398
|
+
"follows_instructions",
|
|
399
|
+
"contains_all_required_fields",
|
|
400
|
+
# Testing
|
|
401
|
+
"TestSuite",
|
|
402
|
+
"create_test_suite",
|
|
403
|
+
# Workflows
|
|
404
|
+
"WorkflowTracer",
|
|
405
|
+
"create_workflow_tracer",
|
|
406
|
+
"trace_workflow_step",
|
|
407
|
+
# Context
|
|
408
|
+
"ContextMetadata",
|
|
409
|
+
"EvalContext",
|
|
410
|
+
"WithContext",
|
|
411
|
+
"create_context",
|
|
412
|
+
"get_current_context",
|
|
413
|
+
"merge_with_context",
|
|
414
|
+
"with_context",
|
|
415
|
+
"with_context_sync",
|
|
416
|
+
"clone_context",
|
|
417
|
+
"merge_contexts",
|
|
418
|
+
"validate_context",
|
|
419
|
+
# Logger
|
|
420
|
+
"Logger",
|
|
421
|
+
"RequestLogger",
|
|
422
|
+
"create_logger",
|
|
423
|
+
"get_logger",
|
|
424
|
+
"set_logger",
|
|
425
|
+
# Pagination
|
|
426
|
+
"PaginatedIterator",
|
|
427
|
+
"PaginatedResponse",
|
|
428
|
+
"auto_paginate",
|
|
429
|
+
"create_paginated_iterator",
|
|
430
|
+
"create_pagination_meta",
|
|
431
|
+
"encode_cursor",
|
|
432
|
+
"decode_cursor",
|
|
433
|
+
"parse_pagination_params",
|
|
434
|
+
# Batch
|
|
435
|
+
"RequestBatcher",
|
|
436
|
+
"batch_process",
|
|
437
|
+
"can_batch",
|
|
438
|
+
# Cache
|
|
439
|
+
"CacheTTL",
|
|
440
|
+
"RequestCache",
|
|
441
|
+
"should_cache",
|
|
442
|
+
"get_ttl",
|
|
443
|
+
# Golden datasets / autonomous workflows
|
|
444
|
+
"LabeledGoldenCase",
|
|
445
|
+
"SyntheticGoldenCase",
|
|
446
|
+
"FailureModeSummary",
|
|
447
|
+
"AnalyzeSummary",
|
|
448
|
+
"NormalizedRunCase",
|
|
449
|
+
"NormalizedRunArtifact",
|
|
450
|
+
"RunMetrics",
|
|
451
|
+
"parse_labeled_dataset",
|
|
452
|
+
"analyze_labeled_dataset",
|
|
453
|
+
"format_analyze_human",
|
|
454
|
+
"write_jsonl",
|
|
455
|
+
"extract_run_metrics",
|
|
456
|
+
"normalize_run_artifact",
|
|
457
|
+
"ClusterSample",
|
|
458
|
+
"ClusterCase",
|
|
459
|
+
"TraceCluster",
|
|
460
|
+
"ClusterSummary",
|
|
461
|
+
"cluster_run_result",
|
|
462
|
+
"format_cluster_human",
|
|
463
|
+
"SynthesizeSummary",
|
|
464
|
+
"DimensionMatrix",
|
|
465
|
+
"parse_dimension_matrix",
|
|
466
|
+
"synthesize_labeled_dataset",
|
|
467
|
+
"format_synthesize_human",
|
|
468
|
+
"NormalizedBudgetConfig",
|
|
469
|
+
"ReplayDecision",
|
|
470
|
+
"determine_comparison_basis",
|
|
471
|
+
"evaluate_replay_outcome",
|
|
472
|
+
"AutoPlanStep",
|
|
473
|
+
"AutoOptions",
|
|
474
|
+
"AutoDiffSnapshot",
|
|
475
|
+
"AutoIterationResult",
|
|
476
|
+
"AutoReport",
|
|
477
|
+
"build_auto_plan",
|
|
478
|
+
"decide_auto_experiment",
|
|
479
|
+
"build_auto_report",
|
|
480
|
+
"format_auto_human",
|
|
481
|
+
"append_auto_history",
|
|
482
|
+
"write_auto_report",
|
|
483
|
+
"run_auto_daemon",
|
|
484
|
+
# Streaming
|
|
485
|
+
"RateLimiter",
|
|
486
|
+
"BatchProgress",
|
|
487
|
+
"BatchResult",
|
|
488
|
+
"stream_evaluation",
|
|
489
|
+
"batch_read",
|
|
490
|
+
"chunk",
|
|
491
|
+
# Regression
|
|
492
|
+
"GATE_EXIT",
|
|
493
|
+
"GATE_CATEGORY",
|
|
494
|
+
"REPORT_SCHEMA_VERSION",
|
|
495
|
+
"ARTIFACTS",
|
|
496
|
+
"Baseline",
|
|
497
|
+
"BaselineTolerance",
|
|
498
|
+
"RegressionDelta",
|
|
499
|
+
"RegressionReport",
|
|
500
|
+
"evaluate_regression",
|
|
501
|
+
"compute_baseline_checksum",
|
|
502
|
+
"verify_baseline_checksum",
|
|
503
|
+
# Snapshot
|
|
504
|
+
"SnapshotManager",
|
|
505
|
+
"SnapshotData",
|
|
506
|
+
"SnapshotMetadata",
|
|
507
|
+
"SnapshotComparison",
|
|
508
|
+
"snapshot",
|
|
509
|
+
"load_snapshot",
|
|
510
|
+
"compare_with_snapshot",
|
|
511
|
+
"delete_snapshot",
|
|
512
|
+
"list_snapshots",
|
|
513
|
+
# Export/Import
|
|
514
|
+
"ExportData",
|
|
515
|
+
"ExportFormat",
|
|
516
|
+
"ExportOptions",
|
|
517
|
+
"ImportOptions",
|
|
518
|
+
"ImportResult",
|
|
519
|
+
"export_data",
|
|
520
|
+
"import_data",
|
|
521
|
+
"export_to_file",
|
|
522
|
+
"import_from_file",
|
|
523
|
+
"import_from_langsmith",
|
|
524
|
+
"convert_to_csv",
|
|
525
|
+
# Matchers
|
|
526
|
+
"to_pass_gate",
|
|
527
|
+
"assert_passes_gate",
|
|
528
|
+
"GateAssertionError",
|
|
529
|
+
# OpenAI integration
|
|
530
|
+
"trace_openai",
|
|
531
|
+
"trace_openai_call",
|
|
532
|
+
"openai_chat_eval",
|
|
533
|
+
"OpenAIChatEvalCase",
|
|
534
|
+
"OpenAIChatEvalCaseResult",
|
|
535
|
+
"OpenAIChatEvalResult",
|
|
536
|
+
# Anthropic integration
|
|
537
|
+
"trace_anthropic",
|
|
538
|
+
"trace_anthropic_call",
|
|
539
|
+
# Framework integrations
|
|
540
|
+
"trace_langchain",
|
|
541
|
+
"trace_crewai",
|
|
542
|
+
"trace_autogen",
|
|
543
|
+
# Runtime DSL
|
|
544
|
+
"define_eval",
|
|
545
|
+
"define_eval_skip",
|
|
546
|
+
"define_eval_only",
|
|
547
|
+
"define_suite",
|
|
548
|
+
"create_result",
|
|
549
|
+
"evalai",
|
|
550
|
+
"from_dataset",
|
|
551
|
+
"get_filtered_specs",
|
|
552
|
+
# Runtime management
|
|
553
|
+
"create_eval_runtime",
|
|
554
|
+
"get_active_runtime",
|
|
555
|
+
"set_active_runtime",
|
|
556
|
+
"dispose_active_runtime",
|
|
557
|
+
"with_runtime",
|
|
558
|
+
# Runtime execution
|
|
559
|
+
"LocalExecutor",
|
|
560
|
+
"create_local_executor",
|
|
561
|
+
"default_local_executor",
|
|
562
|
+
# Runtime types
|
|
563
|
+
"EvalSpec",
|
|
564
|
+
"EvalResult",
|
|
565
|
+
"SpecConfig",
|
|
566
|
+
"SpecOptions",
|
|
567
|
+
"ExecutorCapabilities",
|
|
568
|
+
"DependsOn",
|
|
569
|
+
# Runtime errors
|
|
570
|
+
"EvalRuntimeError",
|
|
571
|
+
"SpecRegistrationError",
|
|
572
|
+
"SpecExecutionError",
|
|
573
|
+
"EvalSDKRuntimeError",
|
|
574
|
+
"EvalExecutionError",
|
|
575
|
+
# Types
|
|
576
|
+
"CamelModel",
|
|
577
|
+
"QualityScore",
|
|
578
|
+
"QualityBreakdown",
|
|
579
|
+
# Collector (T2)
|
|
580
|
+
"report_trace",
|
|
581
|
+
"ReportTraceInput",
|
|
582
|
+
"ReportTraceOptions",
|
|
583
|
+
"ReportTraceResult",
|
|
584
|
+
"CollectorSpanInput",
|
|
585
|
+
"CollectorFeedbackInput",
|
|
586
|
+
# RunReport (T4)
|
|
587
|
+
"RunReport",
|
|
588
|
+
"RunReportBuilder",
|
|
589
|
+
"RunResult",
|
|
590
|
+
"RunSummary",
|
|
591
|
+
"create_run_report",
|
|
592
|
+
"parse_run_report",
|
|
593
|
+
# OTel (T6)
|
|
594
|
+
"OTelExporter",
|
|
595
|
+
"OTelExporterOptions",
|
|
596
|
+
"OTelExportPayload",
|
|
597
|
+
"create_otel_exporter",
|
|
598
|
+
# Local storage (T7)
|
|
599
|
+
"LocalStorage",
|
|
600
|
+
"LocalStorageStats",
|
|
601
|
+
# Execution mode (T8)
|
|
602
|
+
"ExecutionModeConfig",
|
|
603
|
+
"get_execution_mode",
|
|
604
|
+
"validate_execution_mode",
|
|
605
|
+
# Pytest plugin (T9)
|
|
606
|
+
"assert_passes_gate",
|
|
607
|
+
"assert_score_above",
|
|
608
|
+
"assert_score_between",
|
|
609
|
+
"assert_no_errors",
|
|
610
|
+
"assert_all_assertions_passed",
|
|
611
|
+
# CI context (T10)
|
|
612
|
+
"CIContext",
|
|
613
|
+
"detect_ci_context",
|
|
614
|
+
# Reason codes (T10)
|
|
615
|
+
"REASON_CODES",
|
|
616
|
+
"get_reason_info",
|
|
617
|
+
"is_blocking",
|
|
618
|
+
# Formatters (T10)
|
|
619
|
+
"format_human",
|
|
620
|
+
"format_json",
|
|
621
|
+
"format_github",
|
|
622
|
+
"format_pr_comment",
|
|
623
|
+
# Constants (T12)
|
|
624
|
+
"DEFAULT_BASE_URL",
|
|
625
|
+
# Utils (T12)
|
|
626
|
+
"normalize_input",
|
|
627
|
+
"sha256_input",
|
|
628
|
+
# Runtime context (T13)
|
|
629
|
+
"create_runtime_context",
|
|
630
|
+
"merge_runtime_contexts",
|
|
631
|
+
"clone_runtime_context",
|
|
632
|
+
"validate_runtime_context",
|
|
633
|
+
# Runtime adapters (T13)
|
|
634
|
+
"MigrationResult",
|
|
635
|
+
"migrate_config_to_dsl",
|
|
636
|
+
"migrate_project_to_dsl",
|
|
637
|
+
"migrate_testsuite_to_dsl",
|
|
638
|
+
"LegacyTestDefinition",
|
|
639
|
+
"TestSuiteAdapterOptions",
|
|
640
|
+
"adapt_test_suite",
|
|
641
|
+
"generate_define_eval_code",
|
|
642
|
+
# CLI constants (T14)
|
|
643
|
+
"EXIT",
|
|
644
|
+
# CLI env (T14)
|
|
645
|
+
"is_ci",
|
|
646
|
+
"is_github_actions",
|
|
647
|
+
"is_git_ref",
|
|
648
|
+
"get_github_step_summary_path",
|
|
649
|
+
# CLI config (T14)
|
|
650
|
+
"EvalAIConfig",
|
|
651
|
+
"EvalGateConfig",
|
|
652
|
+
"find_config_path",
|
|
653
|
+
"load_config",
|
|
654
|
+
"merge_config_with_args",
|
|
655
|
+
# CLI API (T14)
|
|
656
|
+
"fetch_api",
|
|
657
|
+
"fetch_quality_latest",
|
|
658
|
+
"fetch_run_details",
|
|
659
|
+
"fetch_run_export",
|
|
660
|
+
"import_run_on_fail",
|
|
661
|
+
"publish_share",
|
|
662
|
+
"FetchOptions",
|
|
663
|
+
"QualityLatestData",
|
|
664
|
+
"RunDetailsData",
|
|
665
|
+
"PublishShareResult",
|
|
666
|
+
# CLI formatter types (T15)
|
|
667
|
+
"CHECK_REPORT_SCHEMA_VERSION",
|
|
668
|
+
"CheckReport",
|
|
669
|
+
"FailedCase",
|
|
670
|
+
"GateThresholds",
|
|
671
|
+
"ScoreBreakdown01",
|
|
672
|
+
"ScoreContribPts",
|
|
673
|
+
# CLI manifest (T15)
|
|
674
|
+
"EvaluationManifest",
|
|
675
|
+
"SpecAnalysis",
|
|
676
|
+
"generate_manifest",
|
|
677
|
+
"read_manifest",
|
|
678
|
+
"write_manifest",
|
|
679
|
+
"read_lock",
|
|
680
|
+
# CLI policy packs (T15)
|
|
681
|
+
"POLICY_PACKS",
|
|
682
|
+
"PolicyPack",
|
|
683
|
+
"get_valid_policy_versions",
|
|
684
|
+
"resolve_policy_pack",
|
|
685
|
+
# CLI regression gate (T15)
|
|
686
|
+
"BuiltinReport",
|
|
687
|
+
"run_builtin_gate",
|
|
688
|
+
"run_gate",
|
|
689
|
+
"format_gate_human",
|
|
690
|
+
"format_gate_github",
|
|
691
|
+
# CLI traces (T15)
|
|
692
|
+
"RunTrace",
|
|
693
|
+
"SpecTrace",
|
|
694
|
+
"build_run_trace",
|
|
695
|
+
"calculate_percentiles",
|
|
696
|
+
"format_latency_table",
|
|
697
|
+
"write_traces",
|
|
698
|
+
# CLI render (T15)
|
|
699
|
+
"truncate_snippet",
|
|
700
|
+
"sort_failed_cases",
|
|
701
|
+
# CLI report (T15)
|
|
702
|
+
"build_check_report",
|
|
703
|
+
"compute_contrib_pts",
|
|
704
|
+
# CLI workspace (T15)
|
|
705
|
+
"EvalWorkspace",
|
|
706
|
+
"resolve_eval_workspace",
|
|
707
|
+
]
|
evalgate_sdk/_version.py
ADDED