mcp-plesk-dev-docs 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import re
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+ _WORD_RE = re.compile(r"[A-Za-z0-9_./+-]+")
9
+ _STOPWORDS = {
10
+ "a",
11
+ "an",
12
+ "and",
13
+ "are",
14
+ "for",
15
+ "from",
16
+ "how",
17
+ "in",
18
+ "is",
19
+ "of",
20
+ "on",
21
+ "or",
22
+ "the",
23
+ "to",
24
+ "via",
25
+ "with",
26
+ "what",
27
+ "which",
28
+ "when",
29
+ "where",
30
+ "who",
31
+ "why",
32
+ }
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class SearchResult:
37
+ text: str
38
+ title: str
39
+ score: float
40
+ metadata: dict[str, Any]
41
+
42
+
43
+ @dataclass(frozen=True)
44
+ class RoutingDecision:
45
+ engine: str
46
+ pilot_config: StructurePilotConfig | None
47
+ reason: str
48
+
49
+
50
+ @dataclass(frozen=True)
51
+ class StructurePilotConfig:
52
+ name: str
53
+ title_weight: float = 2.5
54
+ breadcrumb_weight: float = 1.8
55
+ filename_weight: float = 0.9
56
+ text_weight: float = 0.35
57
+ title_phrase_bonus: float = 1.5
58
+ breadcrumb_phrase_bonus: float = 1.0
59
+ rank_weight: float = 0.5
60
+
61
+
62
+ DEFAULT_PILOT_CONFIG = StructurePilotConfig(name="base")
63
+
64
+ PILOT_CONFIGS: list[StructurePilotConfig] = [
65
+ DEFAULT_PILOT_CONFIG,
66
+ StructurePilotConfig(
67
+ name="title-focused",
68
+ title_weight=3.0,
69
+ breadcrumb_weight=1.5,
70
+ filename_weight=0.8,
71
+ text_weight=0.25,
72
+ title_phrase_bonus=2.0,
73
+ breadcrumb_phrase_bonus=0.75,
74
+ rank_weight=0.35,
75
+ ),
76
+ StructurePilotConfig(
77
+ name="breadcrumb-focused",
78
+ title_weight=2.2,
79
+ breadcrumb_weight=2.4,
80
+ filename_weight=1.0,
81
+ text_weight=0.2,
82
+ title_phrase_bonus=1.25,
83
+ breadcrumb_phrase_bonus=1.5,
84
+ rank_weight=0.4,
85
+ ),
86
+ StructurePilotConfig(
87
+ name="metadata-heavy",
88
+ title_weight=2.8,
89
+ breadcrumb_weight=2.2,
90
+ filename_weight=1.2,
91
+ text_weight=0.15,
92
+ title_phrase_bonus=1.75,
93
+ breadcrumb_phrase_bonus=1.25,
94
+ rank_weight=0.3,
95
+ ),
96
+ ]
97
+
98
+
99
+ def tokenize(text: str) -> list[str]:
100
+ tokens = [token.lower() for token in _WORD_RE.findall(text or "")]
101
+ return [token for token in tokens if token not in _STOPWORDS and len(token) > 1]
102
+
103
+
104
+ def bucket_query(query: str) -> str:
105
+ """Classify a query as structural, lookup, or multi-hop for reporting/routing."""
106
+ normalized = (query or "").lower()
107
+
108
+ # N-gram markers for compound/multi-hop queries
109
+ multi_hop_markers = (
110
+ " and ",
111
+ " then ",
112
+ " also ",
113
+ "together",
114
+ "combined",
115
+ " followed by ",
116
+ " as well as ",
117
+ )
118
+
119
+ structural_markers = (
120
+ "how to",
121
+ "add ",
122
+ "create ",
123
+ "configure ",
124
+ "register ",
125
+ "package ",
126
+ "restart ",
127
+ "install ",
128
+ "set up",
129
+ )
130
+ lookup_markers = (
131
+ "list ",
132
+ "show ",
133
+ "find ",
134
+ "what is",
135
+ "where is",
136
+ "retrieve ",
137
+ "get ",
138
+ "authenticate ",
139
+ )
140
+
141
+ if any(marker in normalized for marker in multi_hop_markers):
142
+ return "multi-hop"
143
+ if any(marker in normalized for marker in structural_markers):
144
+ return "structural"
145
+ if any(marker in normalized for marker in lookup_markers):
146
+ return "lookup"
147
+ return "mixed"
148
+
149
+
150
+ def _field_score(
151
+ query_terms: list[str], value: str | None, weight: float, phrase_bonus: float
152
+ ) -> float:
153
+ if not value:
154
+ return 0.0
155
+
156
+ value_lower = value.lower()
157
+ matches = sum(1 for term in query_terms if term in value_lower)
158
+ if matches == 0:
159
+ return 0.0
160
+
161
+ phrase_match = (
162
+ phrase_bonus
163
+ if " ".join(query_terms[:2]) in value_lower and len(query_terms) >= 2
164
+ else 0.0
165
+ )
166
+ return weight * (1.0 + math.log1p(matches) + phrase_match)
167
+
168
+
169
+ def structure_pilot_score(
170
+ query: str,
171
+ result: dict[str, Any],
172
+ rank: int,
173
+ total: int,
174
+ config: StructurePilotConfig = DEFAULT_PILOT_CONFIG,
175
+ ) -> float:
176
+ """Compute a PageIndex-inspired score from title and breadcrumb structure."""
177
+ query_terms = tokenize(query)
178
+ if not query_terms:
179
+ return 0.0
180
+
181
+ title = result.get("title", "")
182
+ breadcrumb = result.get("breadcrumb", "")
183
+ filename = result.get("filename", "")
184
+ text = result.get("text", "")
185
+
186
+ base_rank_bonus = 0.0
187
+ if total > 1:
188
+ base_rank_bonus = (total - rank) / (total - 1)
189
+
190
+ score = 0.0
191
+ score += _field_score(
192
+ query_terms, title, config.title_weight, config.title_phrase_bonus
193
+ )
194
+ score += _field_score(
195
+ query_terms,
196
+ breadcrumb,
197
+ config.breadcrumb_weight,
198
+ config.breadcrumb_phrase_bonus,
199
+ )
200
+ score += _field_score(query_terms, filename, config.filename_weight, 0.0)
201
+ score += _field_score(query_terms, text, config.text_weight, 0.0)
202
+ score += base_rank_bonus * config.rank_weight
203
+
204
+ normalized_query = " ".join(query_terms)
205
+ if normalized_query and normalized_query in (title or "").lower():
206
+ score += config.title_phrase_bonus
207
+ if normalized_query and normalized_query in (breadcrumb or "").lower():
208
+ score += config.breadcrumb_phrase_bonus
209
+
210
+ return score
211
+
212
+
213
+ def rerank_with_structure(
214
+ query: str,
215
+ results: list[dict[str, Any]],
216
+ config: StructurePilotConfig = DEFAULT_PILOT_CONFIG,
217
+ ) -> list[dict[str, Any]]:
218
+ """Return results sorted by a structure-aware pilot score."""
219
+ total = len(results)
220
+ scored = []
221
+ for rank, result in enumerate(results, start=1):
222
+ enriched = dict(result)
223
+ enriched["_pilot_bucket"] = bucket_query(query)
224
+ enriched["_pilot_config"] = config.name
225
+ enriched["_pilot_score"] = structure_pilot_score(
226
+ query, enriched, rank, total, config=config
227
+ )
228
+ scored.append(enriched)
229
+
230
+ scored.sort(
231
+ key=lambda item: (
232
+ item.get("_pilot_score", 0.0),
233
+ item.get("_score_tq", item.get("_distance", item.get("_score", 0.0))),
234
+ ),
235
+ reverse=True,
236
+ )
237
+ return scored
238
+
239
+
240
+ def get_pilot_configs() -> list[StructurePilotConfig]:
241
+ return list(PILOT_CONFIGS)
242
+
243
+
244
+ def get_pilot_config_by_name(name: str) -> StructurePilotConfig:
245
+ by_name = {cfg.name: cfg for cfg in PILOT_CONFIGS}
246
+ return by_name.get(name, DEFAULT_PILOT_CONFIG)
247
+
248
+
249
+ def list_routing_policies() -> dict[str, str]:
250
+ return {
251
+ "baseline-only": "Always use baseline retrieval for every query.",
252
+ "adaptive": (
253
+ "Route multi-hop and targeted structural intents to pageindex-pilot; "
254
+ "keep lookup and generic intents on baseline."
255
+ ),
256
+ "aggressive": "Always use pageindex-pilot with breadcrumb-focused config.",
257
+ }
258
+
259
+
260
+ def route_query(
261
+ query: str,
262
+ bucket: str,
263
+ routing_policy: str = "baseline-only",
264
+ ) -> RoutingDecision:
265
+ """Select retrieval engine per-query based on policy and intent markers."""
266
+ normalized_policy = (routing_policy or "baseline-only").strip().lower()
267
+ normalized_bucket = (bucket or bucket_query(query)).strip().lower()
268
+ query_lower = (query or "").lower()
269
+
270
+ if normalized_policy == "baseline-only":
271
+ return RoutingDecision(
272
+ engine="baseline",
273
+ pilot_config=None,
274
+ reason="policy-baseline-only",
275
+ )
276
+
277
+ if normalized_policy == "aggressive":
278
+ return RoutingDecision(
279
+ engine="pageindex-pilot",
280
+ pilot_config=get_pilot_config_by_name("breadcrumb-focused"),
281
+ reason="policy-aggressive",
282
+ )
283
+
284
+ if normalized_policy != "adaptive":
285
+ return RoutingDecision(
286
+ engine="baseline",
287
+ pilot_config=None,
288
+ reason="policy-unknown-fallback",
289
+ )
290
+
291
+ multi_hop_markers = (
292
+ " and ",
293
+ " then ",
294
+ " also ",
295
+ "together",
296
+ "combined",
297
+ )
298
+ structural_markers = (
299
+ "how to",
300
+ "add ",
301
+ "create ",
302
+ "where is",
303
+ "which section",
304
+ "which page",
305
+ "how do i",
306
+ )
307
+
308
+ if normalized_bucket == "multi-hop" or any(
309
+ m in query_lower for m in multi_hop_markers
310
+ ):
311
+ return RoutingDecision(
312
+ engine="pageindex-pilot",
313
+ pilot_config=get_pilot_config_by_name("breadcrumb-focused"),
314
+ reason="adaptive-multi-hop",
315
+ )
316
+
317
+ if normalized_bucket == "structural" and any(
318
+ m in query_lower for m in structural_markers
319
+ ):
320
+ return RoutingDecision(
321
+ engine="pageindex-pilot",
322
+ pilot_config=get_pilot_config_by_name("base"),
323
+ reason="adaptive-structural",
324
+ )
325
+
326
+ return RoutingDecision(
327
+ engine="baseline",
328
+ pilot_config=None,
329
+ reason="adaptive-baseline",
330
+ )
@@ -0,0 +1,254 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from statistics import mean
7
+ from typing import Any
8
+
9
+ DEFAULT_GATE_CONFIG: dict[str, Any] = {
10
+ "regression": {
11
+ "hit_rate": {"max_drop": 0.01},
12
+ "mrr": {"max_drop": 0.02},
13
+ "avg_latency_s": {"max_increase_ratio": 0.20},
14
+ },
15
+ "absolute_minimums": {
16
+ "context_recall": 0.85,
17
+ "faithfulness": 0.90,
18
+ },
19
+ "required_metrics": [],
20
+ }
21
+
22
+ _NUMERIC_METRICS = (
23
+ "hit_rate",
24
+ "mrr",
25
+ "avg_latency_s",
26
+ "faithfulness",
27
+ "context_recall",
28
+ "context_precision",
29
+ )
30
+
31
+
32
+ def _identity(run: dict[str, Any]) -> tuple[str, str, str, str]:
33
+ return (
34
+ str(run.get("suite", "control")),
35
+ str(run.get("profile", "unknown")),
36
+ str(run.get("engine", "baseline")),
37
+ str(run.get("routing_policy", "baseline-only")),
38
+ )
39
+
40
+
41
+ def _identity_string(run: dict[str, Any]) -> str:
42
+ suite, profile, engine, routing = _identity(run)
43
+ return (
44
+ f"suite={suite}, profile={profile}, engine={engine}, routing_policy={routing}"
45
+ )
46
+
47
+
48
+ def aggregate_runs(runs: list[dict[str, Any]]) -> list[dict[str, Any]]:
49
+ """Aggregate repeated runs by identity, averaging numeric quality metrics."""
50
+ grouped: dict[tuple[str, str, str, str], list[dict[str, Any]]] = {}
51
+ for run in runs:
52
+ grouped.setdefault(_identity(run), []).append(run)
53
+
54
+ aggregated: list[dict[str, Any]] = []
55
+ for key, items in grouped.items():
56
+ base = {
57
+ "suite": key[0],
58
+ "profile": key[1],
59
+ "engine": key[2],
60
+ "routing_policy": key[3],
61
+ "n_runs": len(items),
62
+ }
63
+ for metric in _NUMERIC_METRICS:
64
+ values = [
65
+ item[metric]
66
+ for item in items
67
+ if isinstance(item.get(metric), (int, float))
68
+ ]
69
+ if values:
70
+ base[metric] = float(mean(values))
71
+ aggregated.append(base)
72
+
73
+ aggregated.sort(key=_identity)
74
+ return aggregated
75
+
76
+
77
+ def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
78
+ out = dict(base)
79
+ for key, value in override.items():
80
+ if isinstance(value, dict) and isinstance(out.get(key), dict):
81
+ out[key] = _deep_merge(out[key], value)
82
+ else:
83
+ out[key] = value
84
+ return out
85
+
86
+
87
+ def load_gate_config(path: str | None) -> dict[str, Any]:
88
+ config = dict(DEFAULT_GATE_CONFIG)
89
+ if not path:
90
+ return config
91
+
92
+ override = json.loads(Path(path).read_text(encoding="utf-8"))
93
+ if not isinstance(override, dict):
94
+ raise ValueError("Gate config must be a JSON object.")
95
+ return _deep_merge(config, override)
96
+
97
+
98
+ def build_baseline_payload(runs: list[dict[str, Any]]) -> dict[str, Any]:
99
+ return {
100
+ "version": 1,
101
+ "created_at": datetime.now(timezone.utc).isoformat(),
102
+ "runs": aggregate_runs(runs),
103
+ }
104
+
105
+
106
+ def write_baseline(path: str, runs: list[dict[str, Any]]) -> dict[str, Any]:
107
+ payload = build_baseline_payload(runs)
108
+ p = Path(path)
109
+ p.parent.mkdir(parents=True, exist_ok=True)
110
+ p.write_text(json.dumps(payload, indent=2), encoding="utf-8")
111
+ return payload
112
+
113
+
114
+ def load_baseline(path: str) -> list[dict[str, Any]]:
115
+ payload = json.loads(Path(path).read_text(encoding="utf-8"))
116
+ if isinstance(payload, list):
117
+ return aggregate_runs(payload)
118
+ if isinstance(payload, dict) and isinstance(payload.get("runs"), list):
119
+ return aggregate_runs(payload["runs"])
120
+ raise ValueError("Baseline file must be a JSON list or object with 'runs'.")
121
+
122
+
123
+ def _check_required_metrics(run, run_name, metrics, failures):
124
+ """Check that all required metrics are present in the run."""
125
+ for metric in metrics:
126
+ if metric not in run:
127
+ failures.append(
128
+ f"Missing required metric '{metric}' in current run: {run_name}"
129
+ )
130
+
131
+
132
+ def _check_regression_gates(run, baseline, run_name, config, failures, warnings):
133
+ """Check metrics against baseline to detect regressions."""
134
+ for metric, cfg in config.items():
135
+ if metric not in run:
136
+ warnings.append(f"Metric '{metric}' missing in current run: {run_name}")
137
+ continue
138
+ if metric not in baseline:
139
+ warnings.append(f"Metric '{metric}' missing in baseline run: {run_name}")
140
+ continue
141
+
142
+ curr = float(run[metric])
143
+ base = float(baseline[metric])
144
+
145
+ if "max_drop" in cfg:
146
+ drop = base - curr
147
+ if drop > float(cfg["max_drop"]):
148
+ failures.append(
149
+ f"Regression gate failed for {metric} ({run_name}): "
150
+ f"drop={drop:.4f}, allowed={float(cfg['max_drop']):.4f}"
151
+ )
152
+ if "max_increase_ratio" in cfg and base > 0:
153
+ ratio = (curr - base) / base
154
+ if ratio > float(cfg["max_increase_ratio"]):
155
+ failures.append(
156
+ f"Regression gate failed for {metric} ({run_name}): "
157
+ f"increase_ratio={ratio:.4f}, "
158
+ f"allowed={float(cfg['max_increase_ratio']):.4f}"
159
+ )
160
+
161
+
162
+ def _check_absolute_minimums(run, run_name, config, failures, warnings):
163
+ """Check metrics against absolute minimum thresholds."""
164
+ for metric, threshold in config.items():
165
+ if metric not in run:
166
+ warnings.append(
167
+ f"Absolute gate metric '{metric}' missing in current run: {run_name}"
168
+ )
169
+ continue
170
+ if float(run[metric]) < float(threshold):
171
+ failures.append(
172
+ f"Absolute gate failed for {metric} ({run_name}): "
173
+ f"value={float(run[metric]):.4f}, threshold={float(threshold):.4f}"
174
+ )
175
+
176
+
177
+ def _check_metrics(
178
+ run: dict[str, Any],
179
+ baseline: dict[str, Any],
180
+ run_name: str,
181
+ gate_config: dict[str, Any],
182
+ failures: list[str],
183
+ warnings: list[str],
184
+ ):
185
+ """Internal helper to check metrics for a single run against gate config."""
186
+ _check_required_metrics(
187
+ run, run_name, gate_config.get("required_metrics", []), failures
188
+ )
189
+ _check_regression_gates(
190
+ run,
191
+ baseline,
192
+ run_name,
193
+ gate_config.get("regression", {}),
194
+ failures,
195
+ warnings,
196
+ )
197
+ _check_absolute_minimums(
198
+ run, run_name, gate_config.get("absolute_minimums", {}), failures, warnings
199
+ )
200
+
201
+
202
+ def evaluate_quality_gates(
203
+ current_runs: list[dict[str, Any]],
204
+ baseline_runs: list[dict[str, Any]],
205
+ gate_config: dict[str, Any],
206
+ ) -> dict[str, Any]:
207
+ current = aggregate_runs(current_runs)
208
+ baseline_map = {_identity(run): run for run in aggregate_runs(baseline_runs)}
209
+
210
+ failures: list[str] = []
211
+ warnings: list[str] = []
212
+
213
+ for run in current:
214
+ ident = _identity(run)
215
+ run_name = _identity_string(run)
216
+ baseline = baseline_map.get(ident)
217
+
218
+ if baseline is None:
219
+ warnings.append(f"No baseline run matched: {run_name}")
220
+ continue
221
+
222
+ _check_metrics(run, baseline, run_name, gate_config, failures, warnings)
223
+
224
+ return {
225
+ "passed": len(failures) == 0,
226
+ "failures": failures,
227
+ "warnings": warnings,
228
+ "current_count": len(current),
229
+ "baseline_count": len(baseline_map),
230
+ }
231
+
232
+
233
+ def format_gate_report(report: dict[str, Any]) -> str:
234
+ lines = [
235
+ "\nQUALITY GATE REPORT",
236
+ "-" * 60,
237
+ f"Current runs : {report.get('current_count', 0)}",
238
+ f"Baseline runs: {report.get('baseline_count', 0)}",
239
+ ]
240
+
241
+ warnings = report.get("warnings", [])
242
+ failures = report.get("failures", [])
243
+
244
+ if warnings:
245
+ lines.append("Warnings:")
246
+ lines.extend(f" - {w}" for w in warnings)
247
+
248
+ if failures:
249
+ lines.append("Failures:")
250
+ lines.extend(f" - {f}" for f in failures)
251
+ else:
252
+ lines.append("All configured quality gates passed.")
253
+
254
+ return "\n".join(lines)
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Terminal reporting helpers for benchmark results.
4
+
5
+ Pure presentation logic — no internal package dependencies.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+
13
+ def print_result(result: dict[str, Any], final_k: int) -> None:
14
+ """Print a single profile/engine benchmark result."""
15
+ print(f" Hit Rate (HR@{final_k}) : {result['hit_rate']:.1%}")
16
+ print(f" MRR@{final_k} : {result['mrr']:.3f}")
17
+ if "faithfulness" in result:
18
+ print(f" Faithfulness : {result['faithfulness']:.3f}")
19
+ print(f" Context Recall : {result['context_recall']:.3f}")
20
+ print(f" Context Precision : {result['context_precision']:.3f}")
21
+ print(f" Avg latency : {result['avg_latency_s']:.3f}s")
22
+ print(f" Model RSS delta : {result['model_rss_mb']:.0f} MB")
23
+ print(f" Routing policy : {result['routing_policy']}")
24
+ if result.get("bucket_metrics"):
25
+ for bucket_name, metrics in result["bucket_metrics"].items():
26
+ print(
27
+ f" {bucket_name.title()} MRR : {metrics.get('mrr', 0.0):.3f} "
28
+ f"(n={metrics.get('n', 0)})"
29
+ )
30
+
31
+ print("\n Per-query results:")
32
+ for pq in result["per_query"]:
33
+ status = "HIT " if pq["hit"] else "MISS"
34
+ print(
35
+ f" {status} [{pq['latency_s']:.2f}s] [{pq['bucket']}] "
36
+ f"[{pq['selected_engine']}] {pq['query'][:70]}"
37
+ )
38
+
39
+
40
+ def print_summary_table(all_results: list[dict[str, Any]]) -> None:
41
+ """Print a side-by-side comparison table of all benchmark results."""
42
+ if len(all_results) <= 1:
43
+ return
44
+
45
+ print(f"\n{'=' * 60}")
46
+ print("SUMMARY")
47
+ print("=" * 60)
48
+ has_ragas = any("faithfulness" in r for r in all_results)
49
+ if has_ragas:
50
+ header = (
51
+ f"{'Profile':<10} {'Engine':<15} {'HR@5':>8} {'MRR@5':>8} "
52
+ f"{'Faith':>8} {'Recall':>8} {'Prec':>8} {'Latency':>10}"
53
+ )
54
+ else:
55
+ header = (
56
+ f"{'Profile':<10} {'Engine':<15} {'HR@5':>8} {'MRR@5':>8} "
57
+ f"{'Latency':>10} {'RSS MB':>10}"
58
+ )
59
+ print(header)
60
+ print("-" * len(header))
61
+ for result in all_results:
62
+ if has_ragas:
63
+ print(
64
+ f"{result['profile']:<10} {result['engine']:<15} "
65
+ f"{result['hit_rate']:>7.1%} "
66
+ f"{result['mrr']:>8.3f} "
67
+ f"{result.get('faithfulness', 0.0):>8.3f} "
68
+ f"{result.get('context_recall', 0.0):>8.3f} "
69
+ f"{result.get('context_precision', 0.0):>8.3f} "
70
+ f"{result['avg_latency_s']:>9.3f}s"
71
+ )
72
+ else:
73
+ print(
74
+ f"{result['profile']:<10} {result['engine']:<15} "
75
+ f"{result['hit_rate']:>7.1%} "
76
+ f"{result['mrr']:>8.3f} "
77
+ f"{result['avg_latency_s']:>9.3f}s "
78
+ f"{result['model_rss_mb']:>9.0f}"
79
+ )
80
+
81
+
82
+ def print_autoresearch_summary(all_results: list[dict[str, Any]]) -> None:
83
+ """Print the autoresearch summary, highlighting the best structural config."""
84
+ if not all_results:
85
+ return
86
+
87
+ pageindex_runs = [r for r in all_results if r.get("engine") == "pageindex-pilot"]
88
+ if not pageindex_runs:
89
+ return
90
+
91
+ structural_best = max(
92
+ pageindex_runs,
93
+ key=lambda r: r.get("bucket_metrics", {}).get("structural", {}).get("mrr", 0.0),
94
+ )
95
+ structural_metrics = structural_best.get("bucket_metrics", {}).get("structural", {})
96
+ structural_mrr = structural_metrics.get("mrr", 0.0)
97
+ print("\nAUTORESEARCH SUMMARY")
98
+ print("-" * 60)
99
+ print(
100
+ f"Best structural config: {structural_best.get('pilot_config') or 'base'} "
101
+ f"(MRR={structural_mrr:.3f})"
102
+ )
103
+ print(
104
+ "Stop condition: if the structural MRR no longer improves across the "
105
+ "pilot configs, "
106
+ "the structure-aware ceiling has been reached."
107
+ )