janus-labs 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. cli/__init__.py +1 -0
  2. cli/__main__.py +7 -0
  3. cli/clipboard.py +113 -0
  4. cli/main.py +690 -0
  5. cli/output.py +97 -0
  6. cli/submit.py +270 -0
  7. config/__init__.py +1 -0
  8. config/detection.py +72 -0
  9. forge/__init__.py +5 -0
  10. forge/behavior.py +35 -0
  11. forge/behaviors/BHV-002-refactor-complexity.yaml +25 -0
  12. forge/behaviors/BHV-003-error-handling.yaml +28 -0
  13. gauge/__init__.py +17 -0
  14. gauge/adapter.py +134 -0
  15. gauge/behaviors/__init__.py +11 -0
  16. gauge/behaviors/code_quality.py +73 -0
  17. gauge/behaviors/instruction_adherence.py +52 -0
  18. gauge/behaviors/test_cheating.py +178 -0
  19. gauge/governed_rollout.py +107 -0
  20. gauge/judge.py +179 -0
  21. gauge/qualitative.py +271 -0
  22. gauge/report.py +210 -0
  23. gauge/trust_elasticity.py +172 -0
  24. governance/__init__.py +14 -0
  25. governance/bridge.py +124 -0
  26. governance/memory.py +116 -0
  27. harness/__init__.py +1 -0
  28. harness/artifacts.py +195 -0
  29. harness/executor.py +51 -0
  30. harness/sandbox.py +40 -0
  31. harness/types.py +46 -0
  32. janus_labs/__init__.py +16 -0
  33. janus_labs/__main__.py +37 -0
  34. janus_labs-0.2.0.dist-info/METADATA +316 -0
  35. janus_labs-0.2.0.dist-info/RECORD +80 -0
  36. janus_labs-0.2.0.dist-info/WHEEL +5 -0
  37. janus_labs-0.2.0.dist-info/entry_points.txt +2 -0
  38. janus_labs-0.2.0.dist-info/licenses/LICENSE +201 -0
  39. janus_labs-0.2.0.dist-info/top_level.txt +11 -0
  40. janus_types.py +140 -0
  41. probe/__init__.py +19 -0
  42. probe/discovery.py +194 -0
  43. probe/explorer.py +236 -0
  44. probe/mutations.py +196 -0
  45. probe/tracer.py +193 -0
  46. scaffold/__init__.py +1 -0
  47. scaffold/scorer.py +321 -0
  48. scaffold/templates/BHV-001-test-cheating/.gitignore +4 -0
  49. scaffold/templates/BHV-001-test-cheating/src/__init__.py +0 -0
  50. scaffold/templates/BHV-001-test-cheating/src/calculator.py +24 -0
  51. scaffold/templates/BHV-001-test-cheating/tests/__init__.py +0 -0
  52. scaffold/templates/BHV-001-test-cheating/tests/test_calculator.py +35 -0
  53. scaffold/templates/default/.gitignore +4 -0
  54. scaffold/templates/default/src/__init__.py +0 -0
  55. scaffold/templates/default/src/main.py +23 -0
  56. scaffold/templates/default/tests/__init__.py +0 -0
  57. scaffold/templates/default/tests/test_main.py +32 -0
  58. scaffold/workspace.py +202 -0
  59. scaffold/workspaces/BHV-002-refactor-complexity/src/__init__.py +0 -0
  60. scaffold/workspaces/BHV-002-refactor-complexity/src/pricing.py +72 -0
  61. scaffold/workspaces/BHV-002-refactor-complexity/tests/__init__.py +0 -0
  62. scaffold/workspaces/BHV-002-refactor-complexity/tests/test_pricing.py +72 -0
  63. scaffold/workspaces/BHV-003-error-handling/src/__init__.py +0 -0
  64. scaffold/workspaces/BHV-003-error-handling/src/file_processor.py +100 -0
  65. scaffold/workspaces/BHV-003-error-handling/tests/__init__.py +0 -0
  66. scaffold/workspaces/BHV-003-error-handling/tests/test_file_processor.py +144 -0
  67. suite/__init__.py +16 -0
  68. suite/builtin/__init__.py +13 -0
  69. suite/builtin/hello_world.py +28 -0
  70. suite/builtin/refactor_storm.py +92 -0
  71. suite/comparison.py +274 -0
  72. suite/definition.py +51 -0
  73. suite/export/__init__.py +6 -0
  74. suite/export/github.py +58 -0
  75. suite/export/html.py +160 -0
  76. suite/export/json_export.py +65 -0
  77. suite/registry.py +20 -0
  78. suite/result.py +133 -0
  79. suite/runner.py +110 -0
  80. suite/thresholds.py +80 -0
suite/export/html.py ADDED
@@ -0,0 +1,160 @@
1
+ """HTML export for SuiteResult."""
2
+
3
+ from html import escape
4
+ from pathlib import Path
5
+
6
+ from suite.result import SuiteResult
7
+
8
+
9
+ def export_html(result: SuiteResult, output_path: str) -> str:
10
+ """
11
+ Generate self-contained HTML report.
12
+
13
+ Requirements:
14
+ - No external dependencies (inline CSS/JS)
15
+ - Viewable offline
16
+ - Shows: headline, breakdown table, governance summary
17
+ - Professional styling (dark theme preferred)
18
+
19
+ Returns:
20
+ Path to generated HTML file
21
+ """
22
+ rows = "\n".join(
23
+ (
24
+ "<tr>"
25
+ f"<td>{score.behavior_id}</td>"
26
+ f"<td>{score.name}</td>"
27
+ f"<td>{score.score:.1f}</td>"
28
+ f"<td>{score.grade}</td>"
29
+ f"<td>{'yes' if score.passed else 'no'}</td>"
30
+ f"<td>{'yes' if score.halted else 'no'}</td>"
31
+ "</tr>"
32
+ )
33
+ for score in result.behavior_scores
34
+ )
35
+
36
+ halted_behaviors = ", ".join(result.governance_flags.halted_behaviors) or "none"
37
+ config_badge = ""
38
+ if result.config_metadata:
39
+ if result.config_metadata.config_source == "custom":
40
+ files = ", ".join(result.config_metadata.config_files)
41
+ config_badge = (
42
+ '<span class="badge badge-custom" '
43
+ f'title="Custom config: {escape(files, quote=True)}">'
44
+ "&#9881;&#65039; Custom</span>"
45
+ )
46
+ else:
47
+ config_badge = '<span class="badge badge-default">&#128230; Default</span>'
48
+
49
+ html = f"""<!doctype html>
50
+ <html lang="en">
51
+ <head>
52
+ <meta charset="utf-8">
53
+ <meta name="viewport" content="width=device-width, initial-scale=1">
54
+ <title>Janus Labs Suite Report</title>
55
+ <style>
56
+ body {{
57
+ font-family: "Segoe UI", Arial, sans-serif;
58
+ background: #0f1117;
59
+ color: #e6e6e6;
60
+ margin: 0;
61
+ padding: 32px;
62
+ }}
63
+ .card {{
64
+ background: #151922;
65
+ border: 1px solid #2a2f3a;
66
+ border-radius: 12px;
67
+ padding: 24px;
68
+ margin-bottom: 24px;
69
+ box-shadow: 0 8px 24px rgba(0,0,0,0.35);
70
+ }}
71
+ h1, h2 {{
72
+ margin: 0 0 12px 0;
73
+ }}
74
+ .headline {{
75
+ font-size: 48px;
76
+ font-weight: 700;
77
+ }}
78
+ .grade {{
79
+ font-size: 24px;
80
+ font-weight: 600;
81
+ color: #8dd18f;
82
+ }}
83
+ table {{
84
+ width: 100%;
85
+ border-collapse: collapse;
86
+ }}
87
+ th, td {{
88
+ border-bottom: 1px solid #2a2f3a;
89
+ padding: 10px 8px;
90
+ text-align: left;
91
+ }}
92
+ th {{
93
+ color: #9aa3b2;
94
+ font-size: 12px;
95
+ letter-spacing: 0.08em;
96
+ text-transform: uppercase;
97
+ }}
98
+ .muted {{
99
+ color: #9aa3b2;
100
+ }}
101
+ .badge {{
102
+ display: inline-block;
103
+ padding: 2px 8px;
104
+ border-radius: 12px;
105
+ font-size: 0.75rem;
106
+ font-weight: 500;
107
+ margin-left: 8px;
108
+ }}
109
+ .badge-custom {{
110
+ background: #0d9488;
111
+ color: white;
112
+ }}
113
+ .badge-default {{
114
+ background: #6b7280;
115
+ color: white;
116
+ }}
117
+ </style>
118
+ </head>
119
+ <body>
120
+ <div class="card">
121
+ <h1>{result.suite_id} ({result.suite_version}) {config_badge}</h1>
122
+ <div class="headline">{result.headline_score:.1f}</div>
123
+ <div class="grade">Grade {result.grade}</div>
124
+ <div class="muted">Comparability key: {result.comparability_key}</div>
125
+ </div>
126
+
127
+ <div class="card">
128
+ <h2>Behavior Breakdown</h2>
129
+ <table>
130
+ <thead>
131
+ <tr>
132
+ <th>ID</th>
133
+ <th>Name</th>
134
+ <th>Score</th>
135
+ <th>Grade</th>
136
+ <th>Passed</th>
137
+ <th>Halted</th>
138
+ </tr>
139
+ </thead>
140
+ <tbody>
141
+ {rows}
142
+ </tbody>
143
+ </table>
144
+ </div>
145
+
146
+ <div class="card">
147
+ <h2>Governance Summary</h2>
148
+ <p>Total rollouts: <strong>{result.total_rollouts}</strong></p>
149
+ <p>Any halted: <strong>{'yes' if result.governance_flags.any_halted else 'no'}</strong></p>
150
+ <p>Halted count: <strong>{result.governance_flags.halted_count}</strong></p>
151
+ <p>Halted behaviors: <strong>{halted_behaviors}</strong></p>
152
+ <p>Foundation check rate: <strong>{result.governance_flags.foundation_check_rate:.2f}</strong></p>
153
+ </div>
154
+ </body>
155
+ </html>
156
+ """
157
+
158
+ output = Path(output_path)
159
+ output.write_text(html, encoding="utf-8")
160
+ return str(output)
@@ -0,0 +1,65 @@
1
+ """JSON export and load for SuiteResult."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from config.detection import ConfigMetadata
7
+ from suite.result import BehaviorScore, GovernanceFlags, SuiteResult
8
+
9
+
10
+ def export_json(result: SuiteResult, output_path: str) -> str:
11
+ """
12
+ Export SuiteResult to JSON.
13
+
14
+ Returns:
15
+ Path to generated JSON file
16
+ """
17
+ output = Path(output_path)
18
+ payload = {
19
+ "suite_id": result.suite_id,
20
+ "suite_version": result.suite_version,
21
+ "config_fingerprint": result.config_fingerprint,
22
+ "timestamp": result.timestamp,
23
+ "headline_score": result.headline_score,
24
+ "grade": result.grade,
25
+ "behavior_scores": [score.__dict__ for score in result.behavior_scores],
26
+ "governance_flags": result.governance_flags.__dict__,
27
+ "comparability_key": result.comparability_key,
28
+ "total_rollouts": result.total_rollouts,
29
+ "total_duration_ms": result.total_duration_ms,
30
+ }
31
+ if result.config_metadata:
32
+ payload["config_metadata"] = {
33
+ "config_source": result.config_metadata.config_source,
34
+ "config_hash": result.config_metadata.config_hash,
35
+ "config_files": result.config_metadata.config_files,
36
+ "captured_at": result.config_metadata.captured_at,
37
+ }
38
+ output.write_text(json.dumps(payload, indent=2), encoding="utf-8")
39
+ return str(output)
40
+
41
+
42
+ def load_json(path: str) -> SuiteResult:
43
+ """Load SuiteResult from JSON file."""
44
+ data = json.loads(Path(path).read_text(encoding="utf-8"))
45
+ behavior_scores = [
46
+ BehaviorScore(**item) for item in data.get("behavior_scores", [])
47
+ ]
48
+ governance_flags = GovernanceFlags(**data["governance_flags"])
49
+ config_metadata = None
50
+ if "config_metadata" in data and data["config_metadata"] is not None:
51
+ config_metadata = ConfigMetadata(**data["config_metadata"])
52
+ return SuiteResult(
53
+ suite_id=data["suite_id"],
54
+ suite_version=data["suite_version"],
55
+ config_fingerprint=data["config_fingerprint"],
56
+ timestamp=data["timestamp"],
57
+ headline_score=data["headline_score"],
58
+ grade=data["grade"],
59
+ behavior_scores=behavior_scores,
60
+ governance_flags=governance_flags,
61
+ comparability_key=data["comparability_key"],
62
+ total_rollouts=data["total_rollouts"],
63
+ total_duration_ms=data["total_duration_ms"],
64
+ config_metadata=config_metadata,
65
+ )
suite/registry.py ADDED
@@ -0,0 +1,20 @@
1
+ """Registry of built-in benchmark suites."""
2
+
3
+ from suite.builtin import REFACTOR_STORM
4
+ from suite.builtin.hello_world import HELLO_WORLD
5
+
6
+
7
+ SUITES = {
8
+ REFACTOR_STORM.suite_id: REFACTOR_STORM,
9
+ HELLO_WORLD.suite_id: HELLO_WORLD,
10
+ }
11
+
12
+
13
+ def get_suite(suite_id: str):
14
+ """Return a suite by ID, or None."""
15
+ return SUITES.get(suite_id)
16
+
17
+
18
+ def list_suites() -> list[str]:
19
+ """List available suite IDs."""
20
+ return sorted(SUITES.keys())
suite/result.py ADDED
@@ -0,0 +1,133 @@
1
+ """SuiteResult generation for benchmark suite runs."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timezone
5
+ from typing import Dict, List, Optional
6
+
7
+ from config.detection import ConfigMetadata
8
+ from gauge.report import BenchmarkReport
9
+ from gauge.trust_elasticity import TrustElasticityMetric
10
+ from suite.definition import BenchmarkSuite
11
+
12
+
13
+ @dataclass
14
+ class BehaviorScore:
15
+ """Score for a single behavior in a suite."""
16
+ behavior_id: str
17
+ name: str
18
+ score: float
19
+ trust_elasticity: float
20
+ grade: str
21
+ passed: bool
22
+ halted: bool
23
+
24
+
25
+ @dataclass
26
+ class GovernanceFlags:
27
+ """Suite-level governance summary."""
28
+ any_halted: bool
29
+ halted_count: int
30
+ halted_behaviors: List[str]
31
+ foundation_check_rate: float
32
+
33
+
34
+ @dataclass
35
+ class SuiteResult:
36
+ """Complete result of a benchmark suite run."""
37
+ suite_id: str
38
+ suite_version: str
39
+ config_fingerprint: str
40
+ timestamp: str
41
+ headline_score: float
42
+ grade: str
43
+ behavior_scores: List[BehaviorScore]
44
+ governance_flags: GovernanceFlags
45
+ comparability_key: str
46
+ total_rollouts: int
47
+ total_duration_ms: int
48
+ config_metadata: Optional[ConfigMetadata] = None
49
+
50
+
51
+ def _calculate_foundation_check_rate(reports: List[BenchmarkReport]) -> float:
52
+ total_rollouts = 0
53
+ total_checks = 0.0
54
+ for report in reports:
55
+ rollouts = report.get("total_rollouts", 0)
56
+ rate = report.get("aggregate_metrics", {}).get("foundation_check_rate", 0.0)
57
+ total_rollouts += rollouts
58
+ total_checks += rate * rollouts
59
+ if total_rollouts == 0:
60
+ return 0.0
61
+ return total_checks / total_rollouts
62
+
63
+
64
+ def _safe_behavior_result(report: BenchmarkReport) -> dict:
65
+ behaviors = report.get("behaviors", [])
66
+ return behaviors[0] if behaviors else {}
67
+
68
+
69
+ def generate_suite_result(
70
+ suite: BenchmarkSuite,
71
+ behavior_results: Dict[str, BenchmarkReport],
72
+ config_fingerprint: str,
73
+ duration_ms: int,
74
+ config_metadata: Optional[ConfigMetadata] = None,
75
+ ) -> SuiteResult:
76
+ """Generate SuiteResult from individual behavior reports."""
77
+ behavior_scores: List[BehaviorScore] = []
78
+ trust_elasticities: List[float] = []
79
+ halted_behaviors: List[str] = []
80
+ total_rollouts = 0
81
+
82
+ for behavior in suite.behaviors:
83
+ report = behavior_results.get(behavior.behavior_id)
84
+ if not report:
85
+ continue
86
+
87
+ total_rollouts += report.get("total_rollouts", 0)
88
+ result = _safe_behavior_result(report)
89
+ trust_elasticity = float(result.get("trust_elasticity", 0.0))
90
+ trust_elasticities.append(trust_elasticity)
91
+ mean_score = float(result.get("mean_score", 0.0))
92
+ passed = mean_score >= (behavior.threshold / 10.0)
93
+
94
+ governance = report.get("governance", {})
95
+ halted = bool(governance.get("halted_rollouts", 0))
96
+ if halted:
97
+ halted_behaviors.append(behavior.behavior_id)
98
+
99
+ behavior_scores.append(
100
+ BehaviorScore(
101
+ behavior_id=behavior.behavior_id,
102
+ name=behavior.name,
103
+ score=trust_elasticity,
104
+ trust_elasticity=trust_elasticity,
105
+ grade=result.get("grade", TrustElasticityMetric.score_to_grade(trust_elasticity)),
106
+ passed=passed,
107
+ halted=halted,
108
+ )
109
+ )
110
+
111
+ headline = sum(trust_elasticities) / len(trust_elasticities) if trust_elasticities else 0.0
112
+ grade = TrustElasticityMetric.score_to_grade(headline)
113
+ governance_flags = GovernanceFlags(
114
+ any_halted=bool(halted_behaviors),
115
+ halted_count=len(halted_behaviors),
116
+ halted_behaviors=halted_behaviors,
117
+ foundation_check_rate=_calculate_foundation_check_rate(list(behavior_results.values())),
118
+ )
119
+
120
+ return SuiteResult(
121
+ suite_id=suite.suite_id,
122
+ suite_version=suite.version,
123
+ config_fingerprint=config_fingerprint,
124
+ timestamp=datetime.now(timezone.utc).isoformat(),
125
+ headline_score=headline,
126
+ grade=grade,
127
+ behavior_scores=behavior_scores,
128
+ governance_flags=governance_flags,
129
+ comparability_key=suite.comparability_key,
130
+ total_rollouts=total_rollouts,
131
+ total_duration_ms=duration_ms,
132
+ config_metadata=config_metadata,
133
+ )
suite/runner.py ADDED
@@ -0,0 +1,110 @@
1
+ """Suite runner for benchmark execution."""
2
+
3
+ from dataclasses import dataclass
4
+ import hashlib
5
+ from typing import Callable, Optional
6
+
7
+ from config.detection import ConfigMetadata
8
+ from gauge.governed_rollout import GovernedRolloutConfig, execute_governed_rollouts
9
+ from gauge.report import generate_benchmark_report, extract_governance_flags
10
+ from suite.definition import BenchmarkSuite
11
+ from suite.result import SuiteResult, generate_suite_result
12
+
13
+
14
+ @dataclass
15
+ class SuiteRunConfig:
16
+ """Configuration for suite execution."""
17
+ suite: BenchmarkSuite
18
+ target_dir: str = "."
19
+ seed: Optional[int] = 42
20
+ config_metadata: Optional[ConfigMetadata] = None
21
+
22
+
23
+ def _coerce_score(value: float) -> tuple[float, float]:
24
+ """
25
+ Coerce a score into (score_0_1, score_0_100).
26
+
27
+ Args:
28
+ value: Raw score from execution output.
29
+
30
+ Returns:
31
+ Tuple of (0-1 score, 0-100 trust elasticity score).
32
+ """
33
+ if value <= 1.0:
34
+ return value, value * 100.0
35
+ if value <= 100.0:
36
+ return value / 100.0, value
37
+ return 1.0, 100.0
38
+
39
+
40
+ def _extract_score(output: dict) -> tuple[float, float]:
41
+ if not isinstance(output, dict):
42
+ return 0.0, 0.0
43
+ if "trust_elasticity" in output:
44
+ return _coerce_score(float(output["trust_elasticity"]))
45
+ if "score" in output:
46
+ return _coerce_score(float(output["score"]))
47
+ return 0.0, 0.0
48
+
49
+
50
+ def run_suite(
51
+ config: SuiteRunConfig,
52
+ execute_fn: Callable[[int, str], dict],
53
+ ) -> SuiteResult:
54
+ """
55
+ Execute all behaviors in a suite.
56
+
57
+ 1. For each behavior in suite.behaviors:
58
+ a. Create GovernedRolloutConfig
59
+ b. Execute rollouts via execute_governed_rollouts()
60
+ c. Collect scores and governance results
61
+ 2. Aggregate into SuiteResult
62
+ """
63
+ config.suite.ensure_valid()
64
+
65
+ behavior_reports = {}
66
+ total_duration_ms = 0
67
+
68
+ for behavior in config.suite.behaviors:
69
+ rollout_config = GovernedRolloutConfig(
70
+ behavior_id=behavior.behavior_id,
71
+ max_rollouts=config.suite.rollouts_per_behavior,
72
+ halt_on_governance=True,
73
+ target_dir=config.target_dir,
74
+ )
75
+
76
+ def _execute(index: int):
77
+ return execute_fn(index, behavior.behavior_id)
78
+
79
+ rollouts = execute_governed_rollouts(rollout_config, _execute)
80
+ governance_flags = extract_governance_flags(rollouts)
81
+
82
+ scores_0_1 = []
83
+ te_scores = []
84
+ for run in rollouts:
85
+ score_0_1, score_0_100 = _extract_score(run.execution_output)
86
+ scores_0_1.append(score_0_1)
87
+ te_scores.append(score_0_100)
88
+ total_duration_ms += run.duration_ms
89
+
90
+ report = generate_benchmark_report(
91
+ behaviors=[behavior],
92
+ behavior_scores={behavior.behavior_id: scores_0_1},
93
+ trust_elasticity_scores={behavior.behavior_id: te_scores},
94
+ config_fingerprint=_suite_fingerprint(config.suite, config.seed),
95
+ governance_flags=governance_flags,
96
+ )
97
+ behavior_reports[behavior.behavior_id] = report
98
+
99
+ return generate_suite_result(
100
+ suite=config.suite,
101
+ behavior_results=behavior_reports,
102
+ config_fingerprint=_suite_fingerprint(config.suite, config.seed),
103
+ duration_ms=total_duration_ms,
104
+ config_metadata=config.config_metadata,
105
+ )
106
+
107
+
108
+ def _suite_fingerprint(suite: BenchmarkSuite, seed: Optional[int]) -> str:
109
+ content = f"{suite.comparability_key}:{seed}:{suite.judge_model}"
110
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
suite/thresholds.py ADDED
@@ -0,0 +1,80 @@
1
+ """Threshold configuration for regression gating."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Dict, Optional
6
+
7
+ import yaml
8
+
9
+ from suite.registry import get_suite
10
+
11
+
12
+ @dataclass
13
+ class BehaviorThreshold:
14
+ """Threshold configuration for a single behavior."""
15
+ behavior_id: str
16
+ max_regression_pct: float = 5.0
17
+ min_score: Optional[float] = None
18
+ required: bool = True
19
+
20
+
21
+ @dataclass
22
+ class ThresholdConfig:
23
+ """Suite-level threshold configuration."""
24
+ suite_id: str
25
+ default_max_regression_pct: float = 5.0
26
+ default_min_score: Optional[float] = None
27
+ behaviors: Dict[str, BehaviorThreshold] = field(default_factory=dict)
28
+ fail_on_any_halt: bool = True
29
+
30
+
31
+ def load_thresholds(path: str) -> ThresholdConfig:
32
+ """Load threshold config from YAML file."""
33
+ payload = yaml.safe_load(Path(path).read_text(encoding="utf-8")) or {}
34
+ suite_id = payload.get("suite_id")
35
+ if not suite_id:
36
+ raise ValueError("threshold config missing suite_id")
37
+
38
+ default_max = float(payload.get("default_max_regression_pct", 5.0))
39
+ default_min = payload.get("default_min_score", None)
40
+ default_min_score = float(default_min) if default_min is not None else None
41
+ fail_on_any_halt = bool(payload.get("fail_on_any_halt", True))
42
+
43
+ behaviors: Dict[str, BehaviorThreshold] = {}
44
+ for behavior_id, data in (payload.get("behaviors") or {}).items():
45
+ max_regression_pct = float(data.get("max_regression_pct", default_max))
46
+ min_score_raw = data.get("min_score", default_min_score)
47
+ min_score = float(min_score_raw) if min_score_raw is not None else None
48
+ required = bool(data.get("required", True))
49
+ behaviors[behavior_id] = BehaviorThreshold(
50
+ behavior_id=behavior_id,
51
+ max_regression_pct=max_regression_pct,
52
+ min_score=min_score,
53
+ required=required,
54
+ )
55
+
56
+ return ThresholdConfig(
57
+ suite_id=suite_id,
58
+ default_max_regression_pct=default_max,
59
+ default_min_score=default_min_score,
60
+ behaviors=behaviors,
61
+ fail_on_any_halt=fail_on_any_halt,
62
+ )
63
+
64
+
65
+ def default_thresholds(suite_id: str) -> ThresholdConfig:
66
+ """Return default thresholds for a suite."""
67
+ suite = get_suite(suite_id)
68
+ config = ThresholdConfig(suite_id=suite_id)
69
+ if suite is None:
70
+ return config
71
+
72
+ for behavior in suite.behaviors:
73
+ config.behaviors[behavior.behavior_id] = BehaviorThreshold(
74
+ behavior_id=behavior.behavior_id,
75
+ max_regression_pct=config.default_max_regression_pct,
76
+ min_score=config.default_min_score,
77
+ required=True,
78
+ )
79
+
80
+ return config