evalgate-sdk 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. evalgate_sdk/__init__.py +707 -0
  2. evalgate_sdk/_version.py +3 -0
  3. evalgate_sdk/assertions.py +1362 -0
  4. evalgate_sdk/auto.py +247 -0
  5. evalgate_sdk/batch.py +174 -0
  6. evalgate_sdk/cache.py +111 -0
  7. evalgate_sdk/ci_context.py +123 -0
  8. evalgate_sdk/cli/__init__.py +111 -0
  9. evalgate_sdk/cli/api.py +261 -0
  10. evalgate_sdk/cli/cli_constants.py +20 -0
  11. evalgate_sdk/cli/commands.py +1041 -0
  12. evalgate_sdk/cli/config.py +228 -0
  13. evalgate_sdk/cli/env.py +43 -0
  14. evalgate_sdk/cli/formatters/types.py +132 -0
  15. evalgate_sdk/cli/golden_commands.py +322 -0
  16. evalgate_sdk/cli/manifest.py +301 -0
  17. evalgate_sdk/cli/new_commands.py +435 -0
  18. evalgate_sdk/cli/policy_packs.py +103 -0
  19. evalgate_sdk/cli/profiles.py +12 -0
  20. evalgate_sdk/cli/regression_gate.py +312 -0
  21. evalgate_sdk/cli/render/__init__.py +1 -0
  22. evalgate_sdk/cli/render/snippet.py +18 -0
  23. evalgate_sdk/cli/render/sort.py +29 -0
  24. evalgate_sdk/cli/report/__init__.py +1 -0
  25. evalgate_sdk/cli/report/build_check_report.py +209 -0
  26. evalgate_sdk/cli/traces.py +186 -0
  27. evalgate_sdk/cli/workspace.py +63 -0
  28. evalgate_sdk/client.py +609 -0
  29. evalgate_sdk/cluster.py +359 -0
  30. evalgate_sdk/collector.py +161 -0
  31. evalgate_sdk/constants.py +6 -0
  32. evalgate_sdk/context.py +151 -0
  33. evalgate_sdk/errors.py +236 -0
  34. evalgate_sdk/export.py +238 -0
  35. evalgate_sdk/formatters/__init__.py +11 -0
  36. evalgate_sdk/formatters/github.py +51 -0
  37. evalgate_sdk/formatters/human.py +68 -0
  38. evalgate_sdk/formatters/json_fmt.py +11 -0
  39. evalgate_sdk/formatters/pr_comment.py +80 -0
  40. evalgate_sdk/golden.py +426 -0
  41. evalgate_sdk/integrations/__init__.py +1 -0
  42. evalgate_sdk/integrations/anthropic.py +99 -0
  43. evalgate_sdk/integrations/autogen.py +62 -0
  44. evalgate_sdk/integrations/crewai.py +61 -0
  45. evalgate_sdk/integrations/langchain.py +100 -0
  46. evalgate_sdk/integrations/openai.py +155 -0
  47. evalgate_sdk/integrations/openai_eval.py +221 -0
  48. evalgate_sdk/local.py +144 -0
  49. evalgate_sdk/logger.py +123 -0
  50. evalgate_sdk/matchers.py +62 -0
  51. evalgate_sdk/otel.py +256 -0
  52. evalgate_sdk/pagination.py +145 -0
  53. evalgate_sdk/py.typed +0 -0
  54. evalgate_sdk/pytest_plugin.py +96 -0
  55. evalgate_sdk/reason_codes.py +103 -0
  56. evalgate_sdk/regression.py +196 -0
  57. evalgate_sdk/replay_decision.py +115 -0
  58. evalgate_sdk/runtime/__init__.py +50 -0
  59. evalgate_sdk/runtime/adapters/__init__.py +1 -0
  60. evalgate_sdk/runtime/adapters/config_to_dsl.py +270 -0
  61. evalgate_sdk/runtime/adapters/testsuite_to_dsl.py +213 -0
  62. evalgate_sdk/runtime/context.py +68 -0
  63. evalgate_sdk/runtime/eval.py +318 -0
  64. evalgate_sdk/runtime/execution_mode.py +170 -0
  65. evalgate_sdk/runtime/executor.py +92 -0
  66. evalgate_sdk/runtime/registry.py +125 -0
  67. evalgate_sdk/runtime/run_report.py +249 -0
  68. evalgate_sdk/runtime/types.py +143 -0
  69. evalgate_sdk/snapshot.py +219 -0
  70. evalgate_sdk/streaming.py +124 -0
  71. evalgate_sdk/synthesize.py +226 -0
  72. evalgate_sdk/testing.py +128 -0
  73. evalgate_sdk/types.py +666 -0
  74. evalgate_sdk/utils/__init__.py +1 -0
  75. evalgate_sdk/utils/input_hash.py +42 -0
  76. evalgate_sdk/workflows.py +264 -0
  77. evalgate_sdk-3.3.1.dist-info/METADATA +608 -0
  78. evalgate_sdk-3.3.1.dist-info/RECORD +80 -0
  79. evalgate_sdk-3.3.1.dist-info/WHEEL +4 -0
  80. evalgate_sdk-3.3.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,435 @@
1
+ """New CLI commands for Python SDK parity with TypeScript SDK (T5).
2
+
3
+ Commands: start, watch, compare, validate, promote, replay.
4
+ Supporting: templates, profiles, formatters.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import contextlib
10
+ import json
11
+ import os
12
+ import time
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import typer
17
+ from rich.console import Console
18
+ from rich.table import Table
19
+
20
+ console = Console()
21
+
22
+ # Module-level constants for typer defaults to avoid B008
23
+ FILES_ARG = typer.Argument(..., help="Two or more result JSON files to compare")
24
+
25
+
26
+ # ── Templates ─────────────────────────────────────────────────────────
27
+
28
+ TEMPLATE_DESCRIPTIONS = {
29
+ "chatbot": "Conversational AI — tone, helpfulness, safety",
30
+ "codegen": "Code generation — syntax, correctness, style",
31
+ "agent": "Multi-step agent — tool use, reasoning, outcomes",
32
+ "safety": "Safety guards — PII, toxicity, hallucination",
33
+ "rag": "RAG pipeline — retrieval faithfulness, grounding",
34
+ }
35
+
36
+ TEMPLATES: dict[str, dict[str, str]] = {
37
+ "chatbot": {
38
+ "eval/chatbot_quality.py": '''"""Chatbot quality evaluation."""
39
+ from evalgate_sdk.runtime.eval import define_eval, create_result
40
+ from evalgate_sdk.assertions import expect
41
+
42
+ define_eval("chatbot-responds-helpfully", lambda ctx: _eval_helpful(ctx))
43
+
44
+ async def _eval_helpful(ctx):
45
+ response = "I'd be happy to help you with that! Here's what I suggest..."
46
+ helpful = expect(response).to_contain_keywords(["help", "suggest"])
47
+ length = expect(response).to_have_length(min=20, max=500)
48
+ all_passed = helpful.passed and length.passed
49
+ return create_result(passed=all_passed, score=100 if all_passed else 40, output=response)
50
+ ''',
51
+ },
52
+ "codegen": {
53
+ "eval/codegen_accuracy.py": '''"""Code generation accuracy evaluation."""
54
+ from evalgate_sdk.runtime.eval import define_eval, create_result
55
+ from evalgate_sdk.assertions import has_valid_code_syntax
56
+
57
+ define_eval("codegen-produces-valid-python", lambda ctx: _eval_codegen(ctx))
58
+
59
+ async def _eval_codegen(ctx):
60
+ code = "def hello():\\n return \'Hello, World!\'"
61
+ valid = has_valid_code_syntax(code, "python")
62
+ return create_result(passed=valid.passed, score=100 if valid.passed else 0, output=code)
63
+ ''',
64
+ },
65
+ "agent": {
66
+ "eval/agent_tool_use.py": '''"""Agent tool-use evaluation."""
67
+ from evalgate_sdk.runtime.eval import define_eval, create_result
68
+ from evalgate_sdk.assertions import contains_keywords
69
+
70
+ define_eval("agent-uses-tools-correctly", lambda ctx: _eval_agent(ctx))
71
+
72
+ async def _eval_agent(ctx):
73
+ output = "I used the search tool to find: The weather is sunny."
74
+ used_tool = contains_keywords(output, ["search", "tool"])
75
+ return create_result(passed=used_tool.passed, score=100 if used_tool.passed else 0, output=output)
76
+ ''',
77
+ },
78
+ "safety": {
79
+ "eval/safety_checks.py": '''"""Safety guard evaluation."""
80
+ from evalgate_sdk.runtime.eval import define_eval, create_result
81
+ from evalgate_sdk.assertions import expect
82
+
83
+ define_eval("no-pii-leak", lambda ctx: _eval_no_pii(ctx))
84
+
85
+ async def _eval_no_pii(ctx):
86
+ response = "I can help you find information about that topic safely."
87
+ no_pii = expect(response).to_not_contain_pii()
88
+ professional = expect(response).to_be_professional()
89
+ all_passed = no_pii.passed and professional.passed
90
+ return create_result(passed=all_passed, score=100 if all_passed else 0)
91
+ ''',
92
+ },
93
+ "rag": {
94
+ "eval/rag_faithfulness.py": '''"""RAG faithfulness evaluation."""
95
+ from evalgate_sdk.runtime.eval import define_eval, create_result
96
+ from evalgate_sdk.assertions import has_no_hallucinations
97
+
98
+ define_eval("rag-grounded-response", lambda ctx: _eval_rag(ctx))
99
+
100
+ async def _eval_rag(ctx):
101
+ context_docs = ["Paris is the capital of France."]
102
+ response = "The capital of France is Paris."
103
+ grounded = has_no_hallucinations(response, context_docs)
104
+ return create_result(passed=grounded.passed, score=100 if grounded.passed else 0, output=response)
105
+ ''',
106
+ },
107
+ }
108
+
109
+
110
+ def _install_template(template: str, project_root: str) -> int:
111
+ """Install template files into the project. Returns number of files created."""
112
+ files = TEMPLATES.get(template, {})
113
+ count = 0
114
+ for rel_path, content in files.items():
115
+ full = Path(project_root) / rel_path
116
+ full.parent.mkdir(parents=True, exist_ok=True)
117
+ if not full.exists():
118
+ full.write_text(content, encoding="utf-8")
119
+ count += 1
120
+ return count
121
+
122
+
123
+ # ── start ─────────────────────────────────────────────────────────────
124
+
125
+
126
+ def start(
127
+ format: str = typer.Option("human", "--format", "-f", help="Output format: human or json"),
128
+ skip_init: bool = typer.Option(False, "--skip-init", help="Skip init if not set up"),
129
+ template: str = typer.Option("", "--template", "-t", help="Starter template to install"),
130
+ ) -> None:
131
+ """Zero-config startup: one command → init → discover → run."""
132
+ project_root = os.getcwd()
133
+
134
+ if format == "human":
135
+ console.print("\n[bold cyan]🚀 evalgate start — zero-config evaluation run[/bold cyan]\n")
136
+
137
+ # Step 1: Ensure init
138
+ config_path = Path(project_root) / ".evalgate" / "config.json"
139
+ if not config_path.exists() and not skip_init:
140
+ if format == "human":
141
+ console.print("[yellow]📦 No config found. Initializing...[/yellow]")
142
+ config_path.parent.mkdir(parents=True, exist_ok=True)
143
+ config_path.write_text(
144
+ json.dumps(
145
+ {
146
+ "version": 1,
147
+ "project_name": Path(project_root).name,
148
+ "eval_dir": "eval",
149
+ "baseline": ".evalgate/baseline.json",
150
+ },
151
+ indent=2,
152
+ )
153
+ )
154
+ if format == "human":
155
+ console.print("[green]✓ Initialized .evalgate/config.json[/green]")
156
+
157
+ # Step 1b: Install template if requested
158
+ if template:
159
+ if template not in TEMPLATES:
160
+ console.print(f"[red]Unknown template: {template}[/red]")
161
+ console.print(f"Available: {', '.join(TEMPLATES.keys())}")
162
+ raise typer.Exit(1)
163
+ count = _install_template(template, project_root)
164
+ if format == "human":
165
+ console.print(f"[green]✓ Installed {template} template ({count} file(s))[/green]")
166
+
167
+ # Step 2: Discover specs
168
+ if format == "human":
169
+ console.print("\n[cyan]🔍 Discovering specs...[/cyan]")
170
+
171
+ from evalgate_sdk.runtime.execution_mode import get_execution_mode
172
+
173
+ mode_config = get_execution_mode(project_root)
174
+ spec_count = len(mode_config.spec_files)
175
+
176
+ if format == "human":
177
+ console.print(f"[dim]Found {spec_count} spec file(s) in {mode_config.mode} mode[/dim]")
178
+
179
+ if spec_count == 0:
180
+ if format == "human":
181
+ console.print(
182
+ "[yellow]No spec files found. Create eval files with define_eval() or use --template.[/yellow]"
183
+ )
184
+ raise typer.Exit(0)
185
+
186
+ if format == "human":
187
+ console.print("\n[green]✓ Ready to run evaluations[/green]")
188
+ console.print("[dim]Use 'evalgate run' to execute specs[/dim]")
189
+
190
+
191
+ # ── watch ─────────────────────────────────────────────────────────────
192
+
193
+
194
+ def watch(
195
+ eval_dir: str = typer.Option("eval", "--eval-dir", "-e", help="Directory to watch"),
196
+ debounce_ms: int = typer.Option(300, "--debounce", help="Debounce interval in ms"),
197
+ clear_screen: bool = typer.Option(True, "--clear/--no-clear", help="Clear screen between runs"),
198
+ ) -> None:
199
+ """Watch mode — re-run evaluations when source files change."""
200
+ import importlib.util
201
+
202
+ project_root = os.getcwd()
203
+ watch_dir = Path(project_root) / eval_dir
204
+
205
+ if not watch_dir.exists():
206
+ console.print(f"[red]Watch directory not found: {watch_dir}[/red]")
207
+ raise typer.Exit(1)
208
+
209
+ console.print(f"[cyan]👁️ Watching {watch_dir} (debounce: {debounce_ms}ms)[/cyan]")
210
+ console.print("[dim]Press Ctrl+C to stop[/dim]\n")
211
+
212
+ last_mtimes: dict[str, float] = {}
213
+
214
+ def _get_mtimes() -> dict[str, float]:
215
+ mtimes: dict[str, float] = {}
216
+ for f in watch_dir.rglob("*.py"):
217
+ if f.name.startswith("_"):
218
+ continue
219
+ with contextlib.suppress(OSError):
220
+ mtimes[str(f)] = f.stat().st_mtime
221
+ return mtimes
222
+
223
+ def _run_specs() -> None:
224
+ console.print(f"[cyan]▶ Running specs at {time.strftime('%H:%M:%S')}...[/cyan]")
225
+ try:
226
+ from evalgate_sdk.runtime.registry import create_eval_runtime
227
+
228
+ handle = create_eval_runtime("watch-mode")
229
+ for f in sorted(watch_dir.rglob("*.py")):
230
+ if f.name.startswith("_"):
231
+ continue
232
+ try:
233
+ spec = importlib.util.spec_from_file_location(f.stem, f)
234
+ if spec and spec.loader:
235
+ mod = importlib.util.module_from_spec(spec)
236
+ spec.loader.exec_module(mod)
237
+ except Exception as exc:
238
+ console.print(f"[red]Error loading {f.name}: {exc}[/red]")
239
+ specs = handle.runtime.list()
240
+ console.print(f"[green]✓ Discovered {len(specs)} spec(s)[/green]")
241
+ handle.dispose()
242
+ except Exception as exc:
243
+ console.print(f"[red]Run error: {exc}[/red]")
244
+
245
+ # Initial run
246
+ last_mtimes = _get_mtimes()
247
+ _run_specs()
248
+
249
+ try:
250
+ while True:
251
+ time.sleep(debounce_ms / 1000.0)
252
+ current = _get_mtimes()
253
+ if current != last_mtimes:
254
+ last_mtimes = current
255
+ if clear_screen:
256
+ os.system("cls" if os.name == "nt" else "clear")
257
+ _run_specs()
258
+ except KeyboardInterrupt:
259
+ console.print("\n[yellow]Watch mode stopped.[/yellow]")
260
+
261
+
262
+ # ── compare ───────────────────────────────────────────────────────────
263
+
264
+
265
+ def compare(
266
+ files: list[str] = FILES_ARG,
267
+ format: str = typer.Option("human", "--format", "-f", help="Output format"),
268
+ ) -> None:
269
+ """Compare evaluation result files."""
270
+ """Compare results from multiple evaluation runs side-by-side."""
271
+ if len(files) < 2:
272
+ console.print("[red]Need at least 2 result files to compare.[/red]")
273
+ raise typer.Exit(1)
274
+
275
+ runs: list[dict[str, Any]] = []
276
+ for f in files:
277
+ p = Path(f)
278
+ if not p.exists():
279
+ console.print(f"[red]File not found: {f}[/red]")
280
+ raise typer.Exit(1)
281
+ runs.append(json.loads(p.read_text(encoding="utf-8")))
282
+
283
+ if format == "json":
284
+ console.print_json(json.dumps({"runs": runs}))
285
+ return
286
+
287
+ table = Table(title="Run Comparison")
288
+ table.add_column("Metric", style="cyan")
289
+ for _i, f in enumerate(files):
290
+ table.add_column(Path(f).stem, justify="right")
291
+
292
+ # Extract common metrics
293
+ metrics = ["total", "passed", "failed", "pass_rate", "average_score", "total_duration_ms"]
294
+ for metric in metrics:
295
+ row = [metric]
296
+ for run_data in runs:
297
+ summary = run_data.get("summary", {})
298
+ val = summary.get(metric, "-")
299
+ if isinstance(val, float):
300
+ row.append(f"{val:.2f}")
301
+ else:
302
+ row.append(str(val))
303
+ table.add_row(*row)
304
+
305
+ console.print(table)
306
+
307
+
308
+ # ── validate ──────────────────────────────────────────────────────────
309
+
310
+
311
+ def validate(
312
+ eval_dir: str = typer.Option("eval", "--eval-dir", "-e", help="Directory containing spec files"),
313
+ ) -> None:
314
+ """Validate spec files without running them."""
315
+ import importlib.util
316
+
317
+ project_root = os.getcwd()
318
+ eval_path = Path(project_root) / eval_dir
319
+
320
+ if not eval_path.exists():
321
+ console.print(f"[red]Eval directory not found: {eval_path}[/red]")
322
+ raise typer.Exit(1)
323
+
324
+ from evalgate_sdk.runtime.registry import create_eval_runtime
325
+
326
+ handle = create_eval_runtime("validate")
327
+ errors: list[str] = []
328
+ file_count = 0
329
+
330
+ for spec_file in sorted(eval_path.rglob("*.py")):
331
+ if spec_file.name.startswith("_"):
332
+ continue
333
+ file_count += 1
334
+ try:
335
+ spec = importlib.util.spec_from_file_location(spec_file.stem, spec_file)
336
+ if spec and spec.loader:
337
+ mod = importlib.util.module_from_spec(spec)
338
+ spec.loader.exec_module(mod)
339
+ except Exception as exc:
340
+ errors.append(f"{spec_file.name}: {exc}")
341
+
342
+ specs = handle.runtime.list()
343
+ handle.dispose()
344
+
345
+ if errors:
346
+ console.print(f"\n[red]✗ {len(errors)} error(s) in {file_count} file(s):[/red]")
347
+ for err in errors:
348
+ console.print(f" [red]• {err}[/red]")
349
+ raise typer.Exit(1)
350
+
351
+ console.print(f"[green]✓ {len(specs)} spec(s) validated across {file_count} file(s)[/green]")
352
+
353
+
354
+ # ── promote ───────────────────────────────────────────────────────────
355
+
356
+
357
+ def promote(
358
+ candidate_file: str = typer.Argument(..., help="Path to candidate results JSON"),
359
+ baseline_path: str = typer.Option(".evalgate/baseline.json", "--baseline", "-b"),
360
+ min_score: float = typer.Option(90.0, "--min-score", help="Minimum score to promote"),
361
+ ) -> None:
362
+ """Promote candidate eval cases to the regression baseline."""
363
+ cp = Path(candidate_file)
364
+ if not cp.exists():
365
+ console.print(f"[red]Candidate file not found: {candidate_file}[/red]")
366
+ raise typer.Exit(1)
367
+
368
+ candidates = json.loads(cp.read_text(encoding="utf-8"))
369
+ results = candidates.get("results", [])
370
+
371
+ bp = Path(baseline_path)
372
+ baseline: dict[str, Any] = {}
373
+ if bp.exists():
374
+ baseline = json.loads(bp.read_text(encoding="utf-8"))
375
+
376
+ scores = baseline.get("scores", {})
377
+ promoted = 0
378
+ skipped = 0
379
+
380
+ for r in results:
381
+ name = r.get("test_name", r.get("testName", ""))
382
+ score = r.get("score", 0)
383
+ if score >= min_score:
384
+ scores[name] = score
385
+ promoted += 1
386
+ else:
387
+ skipped += 1
388
+
389
+ baseline["scores"] = scores
390
+ bp.parent.mkdir(parents=True, exist_ok=True)
391
+ bp.write_text(json.dumps(baseline, indent=2), encoding="utf-8")
392
+
393
+ console.print(f"[green]✓ Promoted {promoted} case(s) to baseline[/green]")
394
+ if skipped:
395
+ console.print(f"[yellow]⚠ Skipped {skipped} case(s) below min score ({min_score})[/yellow]")
396
+
397
+
398
+ # ── replay ────────────────────────────────────────────────────────────
399
+
400
+
401
+ def replay(
402
+ result_file: str = typer.Argument(..., help="Path to previous run result JSON"),
403
+ spec_name: str = typer.Option("", "--spec", "-s", help="Replay a specific spec by name"),
404
+ ) -> None:
405
+ """Replay a previous evaluation run or specific spec."""
406
+ rp = Path(result_file)
407
+ if not rp.exists():
408
+ console.print(f"[red]Result file not found: {result_file}[/red]")
409
+ raise typer.Exit(1)
410
+
411
+ data = json.loads(rp.read_text(encoding="utf-8"))
412
+ results = data.get("results", [])
413
+
414
+ if spec_name:
415
+ results = [r for r in results if r.get("test_name", r.get("testName", "")) == spec_name]
416
+ if not results:
417
+ console.print(f"[red]No spec named '{spec_name}' found in results.[/red]")
418
+ raise typer.Exit(1)
419
+
420
+ table = Table(title="Replay Results")
421
+ table.add_column("Spec", style="cyan")
422
+ table.add_column("Score", justify="right")
423
+ table.add_column("Status")
424
+ table.add_column("Duration (ms)", justify="right")
425
+
426
+ for r in results:
427
+ name = r.get("test_name", r.get("testName", "?"))
428
+ score = r.get("score", 0)
429
+ passed = r.get("passed", r.get("pass", False))
430
+ dur = r.get("duration_ms", r.get("durationMs", 0))
431
+ status = "[green]✓ passed[/green]" if passed else "[red]✗ failed[/red]"
432
+ table.add_row(name, f"{score:.1f}", status, f"{dur:.0f}")
433
+
434
+ console.print(table)
435
+ console.print(f"\n[dim]{len(results)} result(s) replayed from {rp.name}[/dim]")
@@ -0,0 +1,103 @@
1
+ """Versioned policy packs for evalgate check --policy.
2
+
3
+ Schema: policy_id, version, thresholds, rationale, checks.
4
+ Usage: --policy HIPAA@1
5
+
6
+ Port of ``cli/policy-packs.ts``.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import Any
13
+
14
+
15
+ @dataclass
16
+ class PolicyPack:
17
+ policy_id: str = ""
18
+ version: int = 1
19
+ thresholds: dict[str, Any] = field(default_factory=dict)
20
+ rationale: str = ""
21
+ checks: list[str] = field(default_factory=list)
22
+
23
+
24
+ POLICY_PACKS: dict[str, dict[int, PolicyPack]] = {
25
+ "HIPAA": {
26
+ 1: PolicyPack(
27
+ policy_id="HIPAA",
28
+ version=1,
29
+ thresholds={"required_safety_rate": 0.99, "max_flags": ["SAFETY_RISK"]},
30
+ rationale="HIPAA requires high safety and no safety risks for PHI handling.",
31
+ checks=["safety_rate", "no_safety_flags"],
32
+ ),
33
+ },
34
+ "SOC2": {
35
+ 1: PolicyPack(
36
+ policy_id="SOC2",
37
+ version=1,
38
+ thresholds={"required_safety_rate": 0.95, "max_flags": ["SAFETY_RISK", "LOW_PASS_RATE"]},
39
+ rationale="SOC2 trust criteria for security and availability.",
40
+ checks=["safety_rate", "flag_restrictions"],
41
+ ),
42
+ },
43
+ "GDPR": {
44
+ 1: PolicyPack(
45
+ policy_id="GDPR",
46
+ version=1,
47
+ thresholds={"required_safety_rate": 0.95, "max_flags": ["SAFETY_RISK"]},
48
+ rationale="GDPR data protection and privacy requirements.",
49
+ checks=["safety_rate", "no_safety_flags"],
50
+ ),
51
+ },
52
+ "PCI_DSS": {
53
+ 1: PolicyPack(
54
+ policy_id="PCI_DSS",
55
+ version=1,
56
+ thresholds={"required_safety_rate": 0.99, "max_flags": ["SAFETY_RISK", "LOW_PASS_RATE"]},
57
+ rationale="PCI DSS cardholder data security standards.",
58
+ checks=["safety_rate", "flag_restrictions"],
59
+ ),
60
+ },
61
+ "FINRA_4511": {
62
+ 1: PolicyPack(
63
+ policy_id="FINRA_4511",
64
+ version=1,
65
+ thresholds={"required_safety_rate": 0.95, "max_flags": ["SAFETY_RISK"]},
66
+ rationale="FINRA 4511 supervisory control requirements.",
67
+ checks=["safety_rate", "no_safety_flags"],
68
+ ),
69
+ },
70
+ }
71
+
72
+
73
+ def resolve_policy_pack(spec: str) -> PolicyPack | None:
74
+ """Parse --policy flag (e.g. 'HIPAA@1' or 'HIPAA') and resolve to PolicyPack.
75
+
76
+ Default version is 1 when omitted.
77
+ """
78
+ at = spec.find("@")
79
+ if at >= 0:
80
+ policy_id = spec[:at].upper()
81
+ try:
82
+ version = int(spec[at + 1 :])
83
+ except ValueError:
84
+ return None
85
+ if version < 1:
86
+ return None
87
+ else:
88
+ policy_id = spec.upper()
89
+ version = 1
90
+
91
+ versions = POLICY_PACKS.get(policy_id)
92
+ if not versions:
93
+ return None
94
+ return versions.get(version)
95
+
96
+
97
+ def get_valid_policy_versions() -> list[str]:
98
+ """List valid policy@version specs for error messages."""
99
+ out: list[str] = []
100
+ for policy_id, versions in POLICY_PACKS.items():
101
+ for v in versions:
102
+ out.append(f"{policy_id}@{v}")
103
+ return sorted(out)
@@ -0,0 +1,12 @@
1
+ """Gate profile presets.
2
+
3
+ Extracted to avoid typer dependency in config.py imports.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ PROFILES = {
9
+ "strict": {"min_score": 95, "max_drop": 0, "warn_drop": 0, "min_n": 30, "allow_weak_evidence": False},
10
+ "balanced": {"min_score": 90, "max_drop": 2, "warn_drop": 1, "min_n": 10, "allow_weak_evidence": False},
11
+ "fast": {"min_score": 85, "max_drop": 5, "warn_drop": 2, "min_n": 5, "allow_weak_evidence": True},
12
+ }