@researai/deepscientist 1.5.7 → 1.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +186 -21
- package/README.md +8 -4
- package/bin/ds.js +224 -9
- package/docs/en/00_QUICK_START.md +2 -2
- package/docs/en/07_MEMORY_AND_MCP.md +40 -3
- package/docs/en/99_ACKNOWLEDGEMENTS.md +1 -0
- package/docs/zh/00_QUICK_START.md +2 -2
- package/docs/zh/07_MEMORY_AND_MCP.md +40 -3
- package/docs/zh/99_ACKNOWLEDGEMENTS.md +1 -0
- package/install.sh +34 -0
- package/package.json +2 -2
- package/pyproject.toml +2 -2
- package/src/deepscientist/__init__.py +1 -1
- package/src/deepscientist/acp/envelope.py +1 -0
- package/src/deepscientist/artifact/metrics.py +814 -83
- package/src/deepscientist/artifact/schemas.py +1 -0
- package/src/deepscientist/artifact/service.py +2001 -229
- package/src/deepscientist/bash_exec/monitor.py +1 -1
- package/src/deepscientist/bash_exec/service.py +17 -9
- package/src/deepscientist/channels/qq.py +17 -0
- package/src/deepscientist/channels/relay.py +16 -0
- package/src/deepscientist/config/models.py +6 -0
- package/src/deepscientist/config/service.py +70 -2
- package/src/deepscientist/daemon/api/handlers.py +414 -14
- package/src/deepscientist/daemon/api/router.py +4 -0
- package/src/deepscientist/daemon/app.py +292 -21
- package/src/deepscientist/gitops/diff.py +6 -10
- package/src/deepscientist/mcp/server.py +191 -40
- package/src/deepscientist/prompts/builder.py +65 -19
- package/src/deepscientist/quest/node_traces.py +129 -2
- package/src/deepscientist/quest/service.py +140 -34
- package/src/deepscientist/quest/stage_views.py +175 -33
- package/src/deepscientist/registries/baseline.py +56 -4
- package/src/deepscientist/runners/codex.py +1 -1
- package/src/prompts/connectors/qq.md +1 -1
- package/src/prompts/contracts/shared_interaction.md +14 -0
- package/src/prompts/system.md +113 -32
- package/src/skills/analysis-campaign/SKILL.md +10 -14
- package/src/skills/baseline/SKILL.md +51 -38
- package/src/skills/baseline/references/baseline-plan-template.md +2 -0
- package/src/skills/decision/SKILL.md +12 -8
- package/src/skills/experiment/SKILL.md +28 -16
- package/src/skills/experiment/references/main-experiment-plan-template.md +2 -0
- package/src/skills/figure-polish/SKILL.md +1 -0
- package/src/skills/finalize/SKILL.md +3 -8
- package/src/skills/idea/SKILL.md +18 -8
- package/src/skills/idea/references/literature-survey-template.md +24 -0
- package/src/skills/idea/references/related-work-playbook.md +4 -0
- package/src/skills/idea/references/selection-gate.md +9 -0
- package/src/skills/intake-audit/SKILL.md +2 -8
- package/src/skills/rebuttal/SKILL.md +2 -8
- package/src/skills/review/SKILL.md +2 -8
- package/src/skills/scout/SKILL.md +2 -8
- package/src/skills/write/SKILL.md +53 -17
- package/src/skills/write/templates/DEEPSCIENTIST_NOTES.md +21 -0
- package/src/skills/write/templates/README.md +408 -0
- package/src/skills/write/templates/UPSTREAM_LICENSE.txt +21 -0
- package/src/skills/write/templates/aaai2026/README.md +534 -0
- package/src/skills/write/templates/aaai2026/aaai2026-unified-supp.tex +144 -0
- package/src/skills/write/templates/aaai2026/aaai2026-unified-template.tex +952 -0
- package/src/skills/write/templates/aaai2026/aaai2026.bib +111 -0
- package/src/skills/write/templates/aaai2026/aaai2026.bst +1493 -0
- package/src/skills/write/templates/aaai2026/aaai2026.sty +315 -0
- package/src/skills/write/templates/acl/README.md +50 -0
- package/src/skills/write/templates/acl/acl.sty +312 -0
- package/src/skills/write/templates/acl/acl_latex.tex +377 -0
- package/src/skills/write/templates/acl/acl_lualatex.tex +101 -0
- package/src/skills/write/templates/acl/acl_natbib.bst +1940 -0
- package/src/skills/write/templates/acl/anthology.bib.txt +26 -0
- package/src/skills/write/templates/acl/custom.bib +70 -0
- package/src/skills/write/templates/acl/formatting.md +326 -0
- package/src/skills/write/templates/asplos2027/main.tex +459 -0
- package/src/skills/write/templates/asplos2027/references.bib +135 -0
- package/src/skills/write/templates/colm2025/README.md +3 -0
- package/src/skills/write/templates/colm2025/colm2025_conference.bib +11 -0
- package/src/skills/write/templates/colm2025/colm2025_conference.bst +1440 -0
- package/src/skills/write/templates/colm2025/colm2025_conference.sty +218 -0
- package/src/skills/write/templates/colm2025/colm2025_conference.tex +305 -0
- package/src/skills/write/templates/colm2025/fancyhdr.sty +485 -0
- package/src/skills/write/templates/colm2025/math_commands.tex +508 -0
- package/src/skills/write/templates/colm2025/natbib.sty +1246 -0
- package/src/skills/write/templates/iclr2026/fancyhdr.sty +485 -0
- package/src/skills/write/templates/iclr2026/iclr2026_conference.bib +24 -0
- package/src/skills/write/templates/iclr2026/iclr2026_conference.bst +1440 -0
- package/src/skills/write/templates/iclr2026/iclr2026_conference.sty +246 -0
- package/src/skills/write/templates/iclr2026/iclr2026_conference.tex +414 -0
- package/src/skills/write/templates/iclr2026/math_commands.tex +508 -0
- package/src/skills/write/templates/iclr2026/natbib.sty +1246 -0
- package/src/skills/write/templates/icml2026/algorithm.sty +79 -0
- package/src/skills/write/templates/icml2026/algorithmic.sty +201 -0
- package/src/skills/write/templates/icml2026/example_paper.bib +75 -0
- package/src/skills/write/templates/icml2026/example_paper.tex +662 -0
- package/src/skills/write/templates/icml2026/fancyhdr.sty +864 -0
- package/src/skills/write/templates/icml2026/icml2026.bst +1443 -0
- package/src/skills/write/templates/icml2026/icml2026.sty +767 -0
- package/src/skills/write/templates/neurips2025/Makefile +36 -0
- package/src/skills/write/templates/neurips2025/extra_pkgs.tex +53 -0
- package/src/skills/write/templates/neurips2025/main.tex +38 -0
- package/src/skills/write/templates/neurips2025/neurips.sty +382 -0
- package/src/skills/write/templates/nsdi2027/main.tex +426 -0
- package/src/skills/write/templates/nsdi2027/references.bib +151 -0
- package/src/skills/write/templates/nsdi2027/usenix-2020-09.sty +83 -0
- package/src/skills/write/templates/osdi2026/main.tex +429 -0
- package/src/skills/write/templates/osdi2026/references.bib +150 -0
- package/src/skills/write/templates/osdi2026/usenix-2020-09.sty +83 -0
- package/src/skills/write/templates/sosp2026/main.tex +532 -0
- package/src/skills/write/templates/sosp2026/references.bib +148 -0
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AiManusChatView-BS3V4ZOk.js → AiManusChatView-BKZ103sn.js} +110 -14
- package/src/ui/dist/assets/{AnalysisPlugin-DLPXQsmr.js → AnalysisPlugin-mTTzGAlK.js} +1 -1
- package/src/ui/dist/assets/{AutoFigurePlugin-C-Fr9knQ.js → AutoFigurePlugin-C_wWw4AP.js} +5 -5
- package/src/ui/dist/assets/{CliPlugin-Dd8AHzFg.js → CliPlugin-BH58n3GY.js} +9 -9
- package/src/ui/dist/assets/{CodeEditorPlugin-Dg-RepTl.js → CodeEditorPlugin-BKGRUH7e.js} +8 -8
- package/src/ui/dist/assets/{CodeViewerPlugin-D2J_3nyt.js → CodeViewerPlugin-BMADwFWJ.js} +5 -5
- package/src/ui/dist/assets/{DocViewerPlugin-ChRLLKNb.js → DocViewerPlugin-ZOnTIHLN.js} +3 -3
- package/src/ui/dist/assets/{GitDiffViewerPlugin-DgHfcved.js → GitDiffViewerPlugin-CQ7h1Djm.js} +830 -86
- package/src/ui/dist/assets/{ImageViewerPlugin-C89GZMBy.js → ImageViewerPlugin-GVS5MsnC.js} +5 -5
- package/src/ui/dist/assets/{LabCopilotPanel-BUfIwUcb.js → LabCopilotPanel-BZNv1JML.js} +10 -10
- package/src/ui/dist/assets/{LabPlugin-zvUmQUMq.js → LabPlugin-TWcJsdQA.js} +1 -1
- package/src/ui/dist/assets/{LatexPlugin-C1SSNuWp.js → LatexPlugin-DIjHiR2x.js} +7 -7
- package/src/ui/dist/assets/{MarkdownViewerPlugin-D2Mf5tU5.js → MarkdownViewerPlugin-D3ooGAH0.js} +4 -4
- package/src/ui/dist/assets/{MarketplacePlugin-CF4LgiS2.js → MarketplacePlugin-DfVfE9hN.js} +3 -3
- package/src/ui/dist/assets/{NotebookEditor-BM7Bgwlv.js → NotebookEditor-DDl0_Mc0.js} +1 -1
- package/src/ui/dist/assets/{index-Be0NAmh8.js → NotebookEditor-s8JhzuX1.js} +12 -155
- package/src/ui/dist/assets/{PdfLoader-Bc5qfD-Z.js → PdfLoader-C2Sf6SJM.js} +1 -1
- package/src/ui/dist/assets/{PdfMarkdownPlugin-sh1-IRcp.js → PdfMarkdownPlugin-CXFLoIsa.js} +3 -3
- package/src/ui/dist/assets/{PdfViewerPlugin-C_a7CpWG.js → PdfViewerPlugin-BYTmz2fK.js} +10 -10
- package/src/ui/dist/assets/{SearchPlugin-L4z3HcLf.js → SearchPlugin-CjWBI1O9.js} +1 -1
- package/src/ui/dist/assets/{Stepper-Dk4aQ3fN.js → Stepper-B0Dd8CxK.js} +1 -1
- package/src/ui/dist/assets/{TextViewerPlugin-BsNtlKVo.js → TextViewerPlugin-DdOBU3-S.js} +4 -4
- package/src/ui/dist/assets/{VNCViewer-BpeDcZ5_.js → VNCViewer-B8HGgLwQ.js} +9 -9
- package/src/ui/dist/assets/{bibtex-C4QI-bbj.js → bibtex-CKaefIN2.js} +1 -1
- package/src/ui/dist/assets/{code-DuMINRsg.js → code-BWAY76JP.js} +1 -1
- package/src/ui/dist/assets/{file-content-C3N-432K.js → file-content-C1NwU5oQ.js} +1 -1
- package/src/ui/dist/assets/{file-diff-panel-CffQ4ZMg.js → file-diff-panel-CywslwB9.js} +1 -1
- package/src/ui/dist/assets/{file-socket-CRH59PCO.js → file-socket-B4kzuOBQ.js} +1 -1
- package/src/ui/dist/assets/{file-utils-vYGtW2mI.js → file-utils-H2fjA46S.js} +1 -1
- package/src/ui/dist/assets/{image-DBVGaooo.js → image-D-NZM-6P.js} +1 -1
- package/src/ui/dist/assets/{index-B1P6hQRJ.js → index-7Chr1g9c.js} +3734 -1862
- package/src/ui/dist/assets/{index-DjSFDmgB.js → index-BdM1Gqfr.js} +2 -2
- package/src/ui/dist/assets/{index-BpjYH9Vg.js → index-CDxNdQdz.js} +1 -1
- package/src/ui/dist/assets/{index-Do9N28uB.css → index-DGIYDuTv.css} +163 -34
- package/src/ui/dist/assets/index-DHZJ_0TI.js +159 -0
- package/src/ui/dist/assets/{message-square-BsPDBhiY.js → message-square-BzjLiXir.js} +1 -1
- package/src/ui/dist/assets/{monaco-BTkdPojV.js → monaco-Cb2uKKe6.js} +1 -1
- package/src/ui/dist/assets/{popover-cWjCk-vc.js → popover-Bg72DGgT.js} +1 -1
- package/src/ui/dist/assets/{project-sync-CXn530xb.js → project-sync-Ce_0BglY.js} +1 -1
- package/src/ui/dist/assets/{sigma-04Jr12jg.js → sigma-DPaACDrh.js} +1 -1
- package/src/ui/dist/assets/{tooltip-BdVDl0G5.js → tooltip-C_mA6R0w.js} +1 -1
- package/src/ui/dist/assets/{trash-CB_GlQyC.js → trash-BvTgE5__.js} +1 -1
- package/src/ui/dist/assets/{useCliAccess-BL932NwS.js → useCliAccess-CgPeMOwP.js} +1 -1
- package/src/ui/dist/assets/{useFileDiffOverlay-B2WK7Tvq.js → useFileDiffOverlay-xPhz7P5B.js} +1 -1
- package/src/ui/dist/assets/{wrap-text-YC68g12z.js → wrap-text-C3Un3YQr.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-C0RJvFiJ.js → zoom-out-BgxLa0Ri.js} +1 -1
- package/src/ui/dist/index.html +5 -2
- /package/src/ui/dist/assets/{index-CccQYZjX.css → NotebookEditor-CccQYZjX.css} +0 -0
|
@@ -4,6 +4,27 @@ from collections import OrderedDict
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
|
|
7
|
+
class MetricContractValidationError(ValueError):
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
message: str,
|
|
11
|
+
*,
|
|
12
|
+
error_code: str = "metric_contract_validation_failed",
|
|
13
|
+
details: dict[str, Any] | None = None,
|
|
14
|
+
) -> None:
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
self.error_code = error_code
|
|
17
|
+
self.details = details or {}
|
|
18
|
+
|
|
19
|
+
def as_payload(self) -> dict[str, Any]:
|
|
20
|
+
return {
|
|
21
|
+
"ok": False,
|
|
22
|
+
"error_code": self.error_code,
|
|
23
|
+
"message": str(self),
|
|
24
|
+
**self.details,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
7
28
|
def as_metric_id(value: object, *, fallback: str | None = None) -> str:
|
|
8
29
|
text = str(value or "").strip()
|
|
9
30
|
if text:
|
|
@@ -34,6 +55,17 @@ def infer_metric_direction(metric_id: str) -> str:
|
|
|
34
55
|
return "maximize"
|
|
35
56
|
|
|
36
57
|
|
|
58
|
+
def normalize_metric_direction(value: object, *, metric_id: str | None = None) -> str:
|
|
59
|
+
text = str(value or "").strip().lower().replace("-", "_").replace(" ", "_")
|
|
60
|
+
if text in {"maximize", "max", "higher", "higher_better", "more_is_better", "greater_is_better"}:
|
|
61
|
+
return "maximize"
|
|
62
|
+
if text in {"minimize", "min", "lower", "lower_better", "less_is_better", "smaller_is_better"}:
|
|
63
|
+
return "minimize"
|
|
64
|
+
if metric_id:
|
|
65
|
+
return infer_metric_direction(metric_id)
|
|
66
|
+
return "maximize"
|
|
67
|
+
|
|
68
|
+
|
|
37
69
|
def normalize_metrics_summary(summary: object) -> dict[str, Any]:
|
|
38
70
|
if not isinstance(summary, dict):
|
|
39
71
|
return {}
|
|
@@ -46,6 +78,409 @@ def normalize_metrics_summary(summary: object) -> dict[str, Any]:
|
|
|
46
78
|
return normalized
|
|
47
79
|
|
|
48
80
|
|
|
81
|
+
def flatten_metric_leaf_map(summary: object, *, separator: str = ".") -> dict[str, Any]:
|
|
82
|
+
flattened: OrderedDict[str, Any] = OrderedDict()
|
|
83
|
+
|
|
84
|
+
def visit(value: object, path: tuple[str, ...]) -> None:
|
|
85
|
+
if isinstance(value, dict):
|
|
86
|
+
for key, child in value.items():
|
|
87
|
+
normalized_key = str(key or "").strip()
|
|
88
|
+
if not normalized_key:
|
|
89
|
+
continue
|
|
90
|
+
visit(child, (*path, normalized_key))
|
|
91
|
+
return
|
|
92
|
+
if path:
|
|
93
|
+
flattened[separator.join(path)] = value
|
|
94
|
+
|
|
95
|
+
if isinstance(summary, dict):
|
|
96
|
+
for key, value in summary.items():
|
|
97
|
+
normalized_key = str(key or "").strip()
|
|
98
|
+
if not normalized_key:
|
|
99
|
+
continue
|
|
100
|
+
visit(value, (normalized_key,))
|
|
101
|
+
return dict(flattened)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _resolve_origin_path_value(summary: object, origin_path: object) -> Any:
|
|
105
|
+
if not isinstance(summary, dict):
|
|
106
|
+
return None
|
|
107
|
+
normalized_path = str(origin_path or "").strip().replace("/", ".")
|
|
108
|
+
if not normalized_path:
|
|
109
|
+
return None
|
|
110
|
+
current: Any = summary
|
|
111
|
+
for part in normalized_path.split("."):
|
|
112
|
+
normalized_part = str(part or "").strip()
|
|
113
|
+
if not normalized_part:
|
|
114
|
+
continue
|
|
115
|
+
if not isinstance(current, dict) or normalized_part not in current:
|
|
116
|
+
return None
|
|
117
|
+
current = current[normalized_part]
|
|
118
|
+
return current
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _metric_explanation_fields(metric: dict[str, Any]) -> dict[str, str | None]:
|
|
122
|
+
description = str(metric.get("description") or metric.get("explanation") or "").strip() or None
|
|
123
|
+
derivation = str(metric.get("derivation") or metric.get("how_derived") or "").strip() or None
|
|
124
|
+
source_ref = str(metric.get("source_ref") or metric.get("source") or "").strip() or None
|
|
125
|
+
origin_path = str(metric.get("origin_path") or metric.get("source_path") or "").strip() or None
|
|
126
|
+
return {
|
|
127
|
+
"description": description,
|
|
128
|
+
"derivation": derivation,
|
|
129
|
+
"source_ref": source_ref,
|
|
130
|
+
"origin_path": origin_path,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def resolve_metric_value_from_summary(
|
|
135
|
+
metric_id: str,
|
|
136
|
+
*,
|
|
137
|
+
metrics_summary: object = None,
|
|
138
|
+
primary_metric: object = None,
|
|
139
|
+
origin_path: object = None,
|
|
140
|
+
) -> float | None:
|
|
141
|
+
normalized_metric_id = str(metric_id or "").strip()
|
|
142
|
+
if not normalized_metric_id:
|
|
143
|
+
return None
|
|
144
|
+
summary = normalize_metrics_summary(metrics_summary)
|
|
145
|
+
direct_value = summary.get(normalized_metric_id)
|
|
146
|
+
direct_number = to_number(direct_value)
|
|
147
|
+
if direct_number is not None:
|
|
148
|
+
return direct_number
|
|
149
|
+
origin_value = _resolve_origin_path_value(metrics_summary, origin_path)
|
|
150
|
+
origin_number = to_number(origin_value)
|
|
151
|
+
if origin_number is not None:
|
|
152
|
+
return origin_number
|
|
153
|
+
if isinstance(primary_metric, dict):
|
|
154
|
+
primary_metric_id = str(
|
|
155
|
+
primary_metric.get("metric_id") or primary_metric.get("name") or primary_metric.get("id") or ""
|
|
156
|
+
).strip()
|
|
157
|
+
if primary_metric_id == normalized_metric_id:
|
|
158
|
+
primary_number = to_number(primary_metric.get("value"))
|
|
159
|
+
if primary_number is not None:
|
|
160
|
+
return primary_number
|
|
161
|
+
elif isinstance(primary_metric, str) and primary_metric.strip() == normalized_metric_id:
|
|
162
|
+
return None
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def canonicalize_baseline_submission(
|
|
167
|
+
*,
|
|
168
|
+
metric_contract: object,
|
|
169
|
+
metrics_summary: object = None,
|
|
170
|
+
primary_metric: object = None,
|
|
171
|
+
) -> dict[str, Any]:
|
|
172
|
+
contract_payload = metric_contract if isinstance(metric_contract, dict) else {}
|
|
173
|
+
explicit_metrics = contract_payload.get("metrics") if isinstance(contract_payload.get("metrics"), list) else []
|
|
174
|
+
normalized_contract = normalize_metric_contract(
|
|
175
|
+
contract_payload,
|
|
176
|
+
metrics_summary=None,
|
|
177
|
+
primary_metric=primary_metric,
|
|
178
|
+
)
|
|
179
|
+
canonical_metrics: OrderedDict[str, float] = OrderedDict()
|
|
180
|
+
metric_details: list[dict[str, Any]] = []
|
|
181
|
+
unresolved_metric_ids: list[str] = []
|
|
182
|
+
|
|
183
|
+
if explicit_metrics:
|
|
184
|
+
for metric in normalized_contract.get("metrics", []):
|
|
185
|
+
if not isinstance(metric, dict):
|
|
186
|
+
continue
|
|
187
|
+
metric_id = str(metric.get("metric_id") or "").strip()
|
|
188
|
+
if not metric_id:
|
|
189
|
+
continue
|
|
190
|
+
explanation = _metric_explanation_fields(metric)
|
|
191
|
+
value = resolve_metric_value_from_summary(
|
|
192
|
+
metric_id,
|
|
193
|
+
metrics_summary=metrics_summary,
|
|
194
|
+
primary_metric=primary_metric,
|
|
195
|
+
origin_path=explanation.get("origin_path"),
|
|
196
|
+
)
|
|
197
|
+
required = bool(metric.get("required", True))
|
|
198
|
+
detail = {
|
|
199
|
+
**metric,
|
|
200
|
+
"metric_id": metric_id,
|
|
201
|
+
"required": required,
|
|
202
|
+
**explanation,
|
|
203
|
+
}
|
|
204
|
+
if value is None:
|
|
205
|
+
if required:
|
|
206
|
+
unresolved_metric_ids.append(metric_id)
|
|
207
|
+
detail["value"] = None
|
|
208
|
+
else:
|
|
209
|
+
canonical_metrics[metric_id] = value
|
|
210
|
+
detail["value"] = value
|
|
211
|
+
metric_details.append(detail)
|
|
212
|
+
else:
|
|
213
|
+
for metric_id, value in extract_numeric_metric_map(metrics_summary=metrics_summary).items():
|
|
214
|
+
canonical_metrics[metric_id] = value
|
|
215
|
+
metric_details.append(
|
|
216
|
+
{
|
|
217
|
+
"metric_id": metric_id,
|
|
218
|
+
"required": True,
|
|
219
|
+
"description": None,
|
|
220
|
+
"derivation": None,
|
|
221
|
+
"source_ref": None,
|
|
222
|
+
"origin_path": None,
|
|
223
|
+
"value": value,
|
|
224
|
+
}
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return {
|
|
228
|
+
"metric_contract": normalized_contract,
|
|
229
|
+
"metrics_summary": dict(canonical_metrics),
|
|
230
|
+
"metric_details": metric_details,
|
|
231
|
+
"unresolved_metric_ids": unresolved_metric_ids,
|
|
232
|
+
"source_leaf_map": flatten_metric_leaf_map(metrics_summary),
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def validate_baseline_metric_contract_submission(
|
|
237
|
+
*,
|
|
238
|
+
metric_contract: object,
|
|
239
|
+
metrics_summary: object = None,
|
|
240
|
+
primary_metric: object = None,
|
|
241
|
+
) -> dict[str, Any]:
|
|
242
|
+
canonical = canonicalize_baseline_submission(
|
|
243
|
+
metric_contract=metric_contract,
|
|
244
|
+
metrics_summary=metrics_summary,
|
|
245
|
+
primary_metric=primary_metric,
|
|
246
|
+
)
|
|
247
|
+
normalized_contract = canonical["metric_contract"]
|
|
248
|
+
metric_details = canonical["metric_details"]
|
|
249
|
+
canonical_metrics = canonical["metrics_summary"]
|
|
250
|
+
explicit_metrics = normalized_contract.get("metrics") if isinstance(normalized_contract.get("metrics"), list) else []
|
|
251
|
+
if not explicit_metrics:
|
|
252
|
+
raise MetricContractValidationError(
|
|
253
|
+
"Baseline metric contract must define explicit metric entries for every canonical metric.",
|
|
254
|
+
error_code="baseline_metric_contract_missing_entries",
|
|
255
|
+
details={
|
|
256
|
+
"validation_stage": "baseline",
|
|
257
|
+
"baseline_metric_ids": [],
|
|
258
|
+
"baseline_metric_details": metric_details,
|
|
259
|
+
"source_metric_paths": sorted(canonical["source_leaf_map"].keys()),
|
|
260
|
+
},
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
missing_explanations: list[dict[str, Any]] = []
|
|
264
|
+
for detail in metric_details:
|
|
265
|
+
if not isinstance(detail, dict):
|
|
266
|
+
continue
|
|
267
|
+
missing_fields: list[str] = []
|
|
268
|
+
if not str(detail.get("description") or "").strip():
|
|
269
|
+
missing_fields.append("description")
|
|
270
|
+
if not (str(detail.get("derivation") or "").strip() or str(detail.get("origin_path") or "").strip()):
|
|
271
|
+
missing_fields.append("derivation_or_origin_path")
|
|
272
|
+
if not str(detail.get("source_ref") or "").strip():
|
|
273
|
+
missing_fields.append("source_ref")
|
|
274
|
+
if missing_fields:
|
|
275
|
+
missing_explanations.append(
|
|
276
|
+
{
|
|
277
|
+
"metric_id": detail.get("metric_id"),
|
|
278
|
+
"missing_fields": missing_fields,
|
|
279
|
+
"detail": detail,
|
|
280
|
+
}
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
if canonical["unresolved_metric_ids"]:
|
|
284
|
+
raise MetricContractValidationError(
|
|
285
|
+
"Baseline metric contract is missing canonical values for one or more required metrics.",
|
|
286
|
+
error_code="baseline_metric_values_missing",
|
|
287
|
+
details={
|
|
288
|
+
"validation_stage": "baseline",
|
|
289
|
+
"missing_metric_ids": canonical["unresolved_metric_ids"],
|
|
290
|
+
"baseline_metric_ids": list(canonical_metrics.keys()),
|
|
291
|
+
"baseline_metric_details": metric_details,
|
|
292
|
+
"source_metric_paths": sorted(canonical["source_leaf_map"].keys()),
|
|
293
|
+
},
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
if missing_explanations:
|
|
297
|
+
raise MetricContractValidationError(
|
|
298
|
+
"Baseline metric contract must explain every canonical metric with description, derivation/origin path, and source reference.",
|
|
299
|
+
error_code="baseline_metric_explanations_missing",
|
|
300
|
+
details={
|
|
301
|
+
"validation_stage": "baseline",
|
|
302
|
+
"baseline_metric_ids": list(canonical_metrics.keys()),
|
|
303
|
+
"baseline_metric_details": metric_details,
|
|
304
|
+
"missing_explanations": missing_explanations,
|
|
305
|
+
"source_metric_paths": sorted(canonical["source_leaf_map"].keys()),
|
|
306
|
+
},
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
if not canonical_metrics:
|
|
310
|
+
raise MetricContractValidationError(
|
|
311
|
+
"Baseline metric contract did not yield any canonical numeric metrics.",
|
|
312
|
+
error_code="baseline_metric_contract_empty",
|
|
313
|
+
details={
|
|
314
|
+
"validation_stage": "baseline",
|
|
315
|
+
"baseline_metric_ids": [],
|
|
316
|
+
"baseline_metric_details": metric_details,
|
|
317
|
+
"source_metric_paths": sorted(canonical["source_leaf_map"].keys()),
|
|
318
|
+
},
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
return canonical
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def validate_main_experiment_against_baseline_contract(
|
|
325
|
+
*,
|
|
326
|
+
baseline_contract_payload: object,
|
|
327
|
+
run_metric_contract: object = None,
|
|
328
|
+
metric_rows: object = None,
|
|
329
|
+
metrics_summary: object = None,
|
|
330
|
+
dataset_scope: object = None,
|
|
331
|
+
) -> dict[str, Any]:
|
|
332
|
+
baseline_payload = baseline_contract_payload if isinstance(baseline_contract_payload, dict) else {}
|
|
333
|
+
if not baseline_payload:
|
|
334
|
+
raise MetricContractValidationError(
|
|
335
|
+
"Canonical baseline metric contract JSON is missing, so main-experiment metric validation cannot run.",
|
|
336
|
+
error_code="baseline_metric_contract_json_missing",
|
|
337
|
+
details={
|
|
338
|
+
"validation_stage": "main_experiment",
|
|
339
|
+
"baseline_metric_ids": [],
|
|
340
|
+
"baseline_metric_details": [],
|
|
341
|
+
},
|
|
342
|
+
)
|
|
343
|
+
baseline_metrics_summary = extract_numeric_metric_map(metrics_summary=baseline_payload.get("metrics_summary"))
|
|
344
|
+
baseline_contract = normalize_metric_contract(
|
|
345
|
+
baseline_payload.get("metric_contract"),
|
|
346
|
+
metrics_summary=baseline_metrics_summary,
|
|
347
|
+
primary_metric=baseline_payload.get("primary_metric"),
|
|
348
|
+
)
|
|
349
|
+
baseline_details = []
|
|
350
|
+
required_metric_ids: list[str] = []
|
|
351
|
+
baseline_meta_map = extract_metric_meta_map(
|
|
352
|
+
metric_contract=baseline_contract,
|
|
353
|
+
metrics_summary=baseline_metrics_summary,
|
|
354
|
+
)
|
|
355
|
+
for metric in baseline_contract.get("metrics", []):
|
|
356
|
+
if not isinstance(metric, dict):
|
|
357
|
+
continue
|
|
358
|
+
metric_id = str(metric.get("metric_id") or "").strip()
|
|
359
|
+
if not metric_id or metric_id not in baseline_metrics_summary:
|
|
360
|
+
continue
|
|
361
|
+
detail = {
|
|
362
|
+
**metric,
|
|
363
|
+
**_metric_explanation_fields(metric),
|
|
364
|
+
"metric_id": metric_id,
|
|
365
|
+
"baseline_value": baseline_metrics_summary.get(metric_id),
|
|
366
|
+
}
|
|
367
|
+
baseline_details.append(detail)
|
|
368
|
+
if bool(metric.get("required", True)) and not bool(metric.get("supplementary", False)):
|
|
369
|
+
required_metric_ids.append(metric_id)
|
|
370
|
+
|
|
371
|
+
if not required_metric_ids:
|
|
372
|
+
raise MetricContractValidationError(
|
|
373
|
+
"Canonical baseline metric contract does not expose any required numeric metrics for comparison.",
|
|
374
|
+
error_code="baseline_metric_contract_empty",
|
|
375
|
+
details={
|
|
376
|
+
"validation_stage": "main_experiment",
|
|
377
|
+
"baseline_metric_ids": [],
|
|
378
|
+
"baseline_metric_details": baseline_details,
|
|
379
|
+
},
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
run_numeric_metrics = extract_numeric_metric_map(metric_rows=metric_rows, metrics_summary=metrics_summary)
|
|
383
|
+
run_meta_map = extract_metric_meta_map(
|
|
384
|
+
metric_contract=run_metric_contract,
|
|
385
|
+
metric_rows=metric_rows,
|
|
386
|
+
metrics_summary=metrics_summary,
|
|
387
|
+
)
|
|
388
|
+
missing_metric_ids = [metric_id for metric_id in required_metric_ids if metric_id not in run_numeric_metrics]
|
|
389
|
+
extra_metric_ids = [metric_id for metric_id in run_numeric_metrics.keys() if metric_id not in required_metric_ids]
|
|
390
|
+
direction_mismatches: list[dict[str, Any]] = []
|
|
391
|
+
for metric_id in required_metric_ids:
|
|
392
|
+
if metric_id not in run_numeric_metrics:
|
|
393
|
+
continue
|
|
394
|
+
baseline_direction = normalize_metric_direction(
|
|
395
|
+
(baseline_meta_map.get(metric_id) or {}).get("direction"),
|
|
396
|
+
metric_id=metric_id,
|
|
397
|
+
)
|
|
398
|
+
run_direction = normalize_metric_direction(
|
|
399
|
+
(run_meta_map.get(metric_id) or {}).get("direction"),
|
|
400
|
+
metric_id=metric_id,
|
|
401
|
+
)
|
|
402
|
+
if baseline_direction != run_direction:
|
|
403
|
+
direction_mismatches.append(
|
|
404
|
+
{
|
|
405
|
+
"metric_id": metric_id,
|
|
406
|
+
"expected_direction": baseline_direction,
|
|
407
|
+
"actual_direction": run_direction,
|
|
408
|
+
}
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
expected_eval = (
|
|
412
|
+
dict(baseline_contract.get("evaluation_protocol") or {})
|
|
413
|
+
if isinstance(baseline_contract.get("evaluation_protocol"), dict)
|
|
414
|
+
else {}
|
|
415
|
+
)
|
|
416
|
+
actual_eval = (
|
|
417
|
+
dict((run_metric_contract or {}).get("evaluation_protocol") or {})
|
|
418
|
+
if isinstance((run_metric_contract or {}).get("evaluation_protocol"), dict)
|
|
419
|
+
else {}
|
|
420
|
+
)
|
|
421
|
+
expected_scope = str(
|
|
422
|
+
expected_eval.get("scope_id")
|
|
423
|
+
or expected_eval.get("dataset_scope")
|
|
424
|
+
or dataset_scope
|
|
425
|
+
or ""
|
|
426
|
+
).strip() or None
|
|
427
|
+
actual_scopes = sorted(
|
|
428
|
+
{
|
|
429
|
+
str(row.get("scope_id") or row.get("scope") or dataset_scope or "").strip()
|
|
430
|
+
for row in normalize_metric_rows(metric_rows, metrics_summary=metrics_summary)
|
|
431
|
+
if isinstance(row, dict) and str(row.get("metric_id") or "").strip() in required_metric_ids
|
|
432
|
+
}
|
|
433
|
+
- {""}
|
|
434
|
+
)
|
|
435
|
+
scope_mismatch = bool(expected_scope and actual_scopes and any(scope != expected_scope for scope in actual_scopes))
|
|
436
|
+
eval_protocol_mismatch: dict[str, Any] | None = None
|
|
437
|
+
if expected_eval and actual_eval:
|
|
438
|
+
expected_code_hashes = expected_eval.get("code_hashes") if isinstance(expected_eval.get("code_hashes"), dict) else {}
|
|
439
|
+
actual_code_hashes = actual_eval.get("code_hashes") if isinstance(actual_eval.get("code_hashes"), dict) else {}
|
|
440
|
+
expected_code_paths = expected_eval.get("code_paths") if isinstance(expected_eval.get("code_paths"), list) else []
|
|
441
|
+
actual_code_paths = actual_eval.get("code_paths") if isinstance(actual_eval.get("code_paths"), list) else []
|
|
442
|
+
if (
|
|
443
|
+
str(expected_eval.get("scope_id") or expected_eval.get("dataset_scope") or "").strip()
|
|
444
|
+
and str(expected_eval.get("scope_id") or expected_eval.get("dataset_scope") or "").strip()
|
|
445
|
+
!= str(actual_eval.get("scope_id") or actual_eval.get("dataset_scope") or "").strip()
|
|
446
|
+
) or (expected_code_hashes and actual_code_hashes and expected_code_hashes != actual_code_hashes) or (
|
|
447
|
+
expected_code_paths and actual_code_paths and expected_code_paths != actual_code_paths
|
|
448
|
+
):
|
|
449
|
+
eval_protocol_mismatch = {
|
|
450
|
+
"expected": expected_eval,
|
|
451
|
+
"actual": actual_eval,
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if missing_metric_ids or direction_mismatches or scope_mismatch or eval_protocol_mismatch:
|
|
455
|
+
details: dict[str, Any] = {
|
|
456
|
+
"validation_stage": "main_experiment",
|
|
457
|
+
"baseline_metric_ids": required_metric_ids,
|
|
458
|
+
"baseline_metric_details": baseline_details,
|
|
459
|
+
"missing_metric_ids": missing_metric_ids,
|
|
460
|
+
"extra_metric_ids": extra_metric_ids,
|
|
461
|
+
}
|
|
462
|
+
if direction_mismatches:
|
|
463
|
+
details["direction_mismatches"] = direction_mismatches
|
|
464
|
+
if scope_mismatch:
|
|
465
|
+
details["evaluation_protocol_mismatch"] = {
|
|
466
|
+
"expected_scope_id": expected_scope,
|
|
467
|
+
"actual_scope_ids": actual_scopes,
|
|
468
|
+
}
|
|
469
|
+
if eval_protocol_mismatch:
|
|
470
|
+
details["evaluation_protocol_mismatch"] = eval_protocol_mismatch
|
|
471
|
+
raise MetricContractValidationError(
|
|
472
|
+
"Main experiment must cover every required baseline metric and stay aligned with the canonical evaluation contract.",
|
|
473
|
+
error_code="main_experiment_metric_validation_failed",
|
|
474
|
+
details=details,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
"baseline_metric_ids": required_metric_ids,
|
|
479
|
+
"baseline_metric_details": baseline_details,
|
|
480
|
+
"extra_metric_ids": extra_metric_ids,
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
|
|
49
484
|
def _normalize_metric_entry(metric: object, *, fallback_id: str | None = None) -> dict[str, Any]:
|
|
50
485
|
if isinstance(metric, str):
|
|
51
486
|
metric_id = as_metric_id(metric, fallback=fallback_id)
|
|
@@ -71,9 +506,7 @@ def _normalize_metric_entry(metric: object, *, fallback_id: str | None = None) -
|
|
|
71
506
|
metric_id = as_metric_id(
|
|
72
507
|
metric.get("metric_id") or metric.get("id") or metric.get("name") or fallback_id,
|
|
73
508
|
)
|
|
74
|
-
direction =
|
|
75
|
-
if direction not in {"maximize", "minimize"}:
|
|
76
|
-
direction = infer_metric_direction(metric_id)
|
|
509
|
+
direction = normalize_metric_direction(metric.get("direction"), metric_id=metric_id)
|
|
77
510
|
decimals_raw = metric.get("decimals")
|
|
78
511
|
decimals = int(decimals_raw) if isinstance(decimals_raw, int) else None
|
|
79
512
|
chart_group = str(metric.get("chart_group") or "default").strip() or "default"
|
|
@@ -93,6 +526,7 @@ def normalize_metric_contract(
|
|
|
93
526
|
*,
|
|
94
527
|
baseline_id: str | None = None,
|
|
95
528
|
metrics_summary: object = None,
|
|
529
|
+
metric_rows: object = None,
|
|
96
530
|
primary_metric: object = None,
|
|
97
531
|
baseline_variants: object = None,
|
|
98
532
|
) -> dict[str, Any]:
|
|
@@ -104,7 +538,7 @@ def normalize_metric_contract(
|
|
|
104
538
|
normalized = _normalize_metric_entry(metric, fallback_id=f"metric_{index + 1}")
|
|
105
539
|
metrics_by_id[normalized["metric_id"]] = normalized
|
|
106
540
|
|
|
107
|
-
summary_metrics =
|
|
541
|
+
summary_metrics = extract_numeric_metric_map(metric_rows=metric_rows, metrics_summary=metrics_summary)
|
|
108
542
|
for metric_id in summary_metrics.keys():
|
|
109
543
|
metrics_by_id.setdefault(metric_id, _normalize_metric_entry({}, fallback_id=metric_id))
|
|
110
544
|
|
|
@@ -112,7 +546,7 @@ def normalize_metric_contract(
|
|
|
112
546
|
for variant in baseline_variants:
|
|
113
547
|
if not isinstance(variant, dict):
|
|
114
548
|
continue
|
|
115
|
-
for metric_id in
|
|
549
|
+
for metric_id in extract_numeric_metric_map(metrics_summary=variant.get("metrics_summary")).keys():
|
|
116
550
|
metrics_by_id.setdefault(metric_id, _normalize_metric_entry({}, fallback_id=metric_id))
|
|
117
551
|
|
|
118
552
|
primary_metric_id = str(contract_payload.get("primary_metric_id") or "").strip()
|
|
@@ -131,7 +565,13 @@ def normalize_metric_contract(
|
|
|
131
565
|
if primary_metric_id:
|
|
132
566
|
metrics_by_id.setdefault(primary_metric_id, _normalize_metric_entry({}, fallback_id=primary_metric_id))
|
|
133
567
|
|
|
568
|
+
preserved_top_level = {
|
|
569
|
+
key: value
|
|
570
|
+
for key, value in contract_payload.items()
|
|
571
|
+
if key not in {"contract_id", "primary_metric_id", "metrics"}
|
|
572
|
+
}
|
|
134
573
|
return {
|
|
574
|
+
**preserved_top_level,
|
|
135
575
|
"contract_id": str(contract_payload.get("contract_id") or baseline_id or "default").strip() or "default",
|
|
136
576
|
"primary_metric_id": primary_metric_id or None,
|
|
137
577
|
"metrics": list(metrics_by_id.values()),
|
|
@@ -152,10 +592,10 @@ def selected_baseline_metrics(entry: dict[str, Any] | None, selected_variant_id:
|
|
|
152
592
|
if selected_variant is None and variants:
|
|
153
593
|
selected_variant = next((item for item in variants if isinstance(item, dict)), None)
|
|
154
594
|
if isinstance(selected_variant, dict):
|
|
155
|
-
summary =
|
|
595
|
+
summary = extract_numeric_metric_map(metrics_summary=selected_variant.get("metrics_summary"))
|
|
156
596
|
if summary:
|
|
157
597
|
return summary
|
|
158
|
-
return
|
|
598
|
+
return extract_numeric_metric_map(metrics_summary=entry.get("metrics_summary"))
|
|
159
599
|
|
|
160
600
|
|
|
161
601
|
def baseline_metric_lines(entry: dict[str, Any] | None, selected_variant_id: str | None = None) -> list[dict[str, Any]]:
|
|
@@ -169,9 +609,8 @@ def baseline_metric_lines(entry: dict[str, Any] | None, selected_variant_id: str
|
|
|
169
609
|
if not isinstance(variant, dict):
|
|
170
610
|
continue
|
|
171
611
|
variant_id = str(variant.get("variant_id") or "").strip() or None
|
|
172
|
-
metrics_summary =
|
|
612
|
+
metrics_summary = extract_numeric_metric_map(metrics_summary=variant.get("metrics_summary"))
|
|
173
613
|
for metric_id, value in metrics_summary.items():
|
|
174
|
-
numeric_value = to_number(value)
|
|
175
614
|
lines.append(
|
|
176
615
|
{
|
|
177
616
|
"metric_id": metric_id,
|
|
@@ -179,14 +618,13 @@ def baseline_metric_lines(entry: dict[str, Any] | None, selected_variant_id: str
|
|
|
179
618
|
"baseline_id": baseline_id,
|
|
180
619
|
"variant_id": variant_id,
|
|
181
620
|
"selected": bool(selected_id and variant_id == selected_id),
|
|
182
|
-
"value":
|
|
621
|
+
"value": value,
|
|
183
622
|
"raw_value": value,
|
|
184
623
|
}
|
|
185
624
|
)
|
|
186
625
|
if lines:
|
|
187
626
|
return lines
|
|
188
|
-
for metric_id, value in
|
|
189
|
-
numeric_value = to_number(value)
|
|
627
|
+
for metric_id, value in extract_numeric_metric_map(metrics_summary=entry.get("metrics_summary")).items():
|
|
190
628
|
lines.append(
|
|
191
629
|
{
|
|
192
630
|
"metric_id": metric_id,
|
|
@@ -194,7 +632,7 @@ def baseline_metric_lines(entry: dict[str, Any] | None, selected_variant_id: str
|
|
|
194
632
|
"baseline_id": baseline_id,
|
|
195
633
|
"variant_id": None,
|
|
196
634
|
"selected": True,
|
|
197
|
-
"value":
|
|
635
|
+
"value": value,
|
|
198
636
|
"raw_value": value,
|
|
199
637
|
}
|
|
200
638
|
)
|
|
@@ -240,29 +678,252 @@ def normalize_metric_rows(
|
|
|
240
678
|
return rows
|
|
241
679
|
|
|
242
680
|
|
|
681
|
+
def extract_numeric_metric_map(
|
|
682
|
+
*,
|
|
683
|
+
metric_rows: object = None,
|
|
684
|
+
metrics_summary: object = None,
|
|
685
|
+
) -> dict[str, float]:
|
|
686
|
+
metrics: OrderedDict[str, float] = OrderedDict()
|
|
687
|
+
rows = normalize_metric_rows(metric_rows, metrics_summary=metrics_summary)
|
|
688
|
+
for row in rows:
|
|
689
|
+
if not isinstance(row, dict):
|
|
690
|
+
continue
|
|
691
|
+
metric_id = str(row.get("metric_id") or "").strip()
|
|
692
|
+
numeric_value = to_number(row.get("numeric_value", row.get("value")))
|
|
693
|
+
if not metric_id or numeric_value is None:
|
|
694
|
+
continue
|
|
695
|
+
metrics[metric_id] = numeric_value
|
|
696
|
+
for metric_id, value in normalize_metrics_summary(metrics_summary).items():
|
|
697
|
+
numeric_value = to_number(value)
|
|
698
|
+
if metric_id and numeric_value is not None and metric_id not in metrics:
|
|
699
|
+
metrics[metric_id] = numeric_value
|
|
700
|
+
return dict(metrics)
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
def extract_metric_raw_value_map(
|
|
704
|
+
*,
|
|
705
|
+
metric_rows: object = None,
|
|
706
|
+
metrics_summary: object = None,
|
|
707
|
+
) -> dict[str, Any]:
|
|
708
|
+
values: OrderedDict[str, Any] = OrderedDict()
|
|
709
|
+
rows = normalize_metric_rows(metric_rows, metrics_summary=metrics_summary)
|
|
710
|
+
for row in rows:
|
|
711
|
+
if not isinstance(row, dict):
|
|
712
|
+
continue
|
|
713
|
+
metric_id = str(row.get("metric_id") or "").strip()
|
|
714
|
+
if not metric_id:
|
|
715
|
+
continue
|
|
716
|
+
values[metric_id] = row.get("value")
|
|
717
|
+
for metric_id, value in normalize_metrics_summary(metrics_summary).items():
|
|
718
|
+
if metric_id not in values:
|
|
719
|
+
values[metric_id] = value
|
|
720
|
+
return dict(values)
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def extract_metric_meta_map(
|
|
724
|
+
*,
|
|
725
|
+
metric_contract: object = None,
|
|
726
|
+
metric_rows: object = None,
|
|
727
|
+
metrics_summary: object = None,
|
|
728
|
+
) -> dict[str, dict[str, Any]]:
|
|
729
|
+
contract = normalize_metric_contract(
|
|
730
|
+
metric_contract,
|
|
731
|
+
metrics_summary=metrics_summary,
|
|
732
|
+
metric_rows=metric_rows,
|
|
733
|
+
)
|
|
734
|
+
meta_map: OrderedDict[str, dict[str, Any]] = OrderedDict()
|
|
735
|
+
for item in contract.get("metrics", []):
|
|
736
|
+
if not isinstance(item, dict):
|
|
737
|
+
continue
|
|
738
|
+
metric_id = str(item.get("metric_id") or "").strip()
|
|
739
|
+
if not metric_id:
|
|
740
|
+
continue
|
|
741
|
+
meta_map[metric_id] = {
|
|
742
|
+
**item,
|
|
743
|
+
"metric_id": metric_id,
|
|
744
|
+
"direction": normalize_metric_direction(item.get("direction"), metric_id=metric_id),
|
|
745
|
+
"label": str(item.get("label") or metric_id).strip() or metric_id,
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
for row in normalize_metric_rows(metric_rows, metrics_summary=metrics_summary):
|
|
749
|
+
if not isinstance(row, dict):
|
|
750
|
+
continue
|
|
751
|
+
metric_id = str(row.get("metric_id") or "").strip()
|
|
752
|
+
if not metric_id:
|
|
753
|
+
continue
|
|
754
|
+
current = dict(meta_map.get(metric_id) or _normalize_metric_entry({}, fallback_id=metric_id))
|
|
755
|
+
label = str(row.get("label") or row.get("name") or current.get("label") or metric_id).strip() or metric_id
|
|
756
|
+
decimals = row.get("decimals") if isinstance(row.get("decimals"), int) else current.get("decimals")
|
|
757
|
+
meta_map[metric_id] = {
|
|
758
|
+
**current,
|
|
759
|
+
"metric_id": metric_id,
|
|
760
|
+
"label": label,
|
|
761
|
+
"direction": normalize_metric_direction(row.get("direction") or current.get("direction"), metric_id=metric_id),
|
|
762
|
+
"unit": str(row.get("unit") or current.get("unit") or "").strip() or None,
|
|
763
|
+
"decimals": decimals,
|
|
764
|
+
"chart_group": str(row.get("chart_group") or current.get("chart_group") or "default").strip() or "default",
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
for metric_id in extract_numeric_metric_map(metric_rows=metric_rows, metrics_summary=metrics_summary).keys():
|
|
768
|
+
meta_map.setdefault(metric_id, _normalize_metric_entry({}, fallback_id=metric_id))
|
|
769
|
+
return dict(meta_map)
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def extract_metric_comparison_map(
|
|
773
|
+
baseline_comparisons: object,
|
|
774
|
+
) -> dict[str, dict[str, Any]]:
|
|
775
|
+
comparisons = baseline_comparisons if isinstance(baseline_comparisons, dict) else {}
|
|
776
|
+
return {
|
|
777
|
+
str(item.get("metric_id") or "").strip(): item
|
|
778
|
+
for item in comparisons.get("items", [])
|
|
779
|
+
if isinstance(item, dict) and item.get("metric_id")
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def extract_metric_delta_map(
|
|
784
|
+
*,
|
|
785
|
+
metric_rows: object = None,
|
|
786
|
+
baseline_comparisons: object = None,
|
|
787
|
+
) -> dict[str, float]:
|
|
788
|
+
delta_map: OrderedDict[str, float] = OrderedDict()
|
|
789
|
+
for metric_id, item in extract_metric_comparison_map(baseline_comparisons).items():
|
|
790
|
+
delta_value = to_number(item.get("delta"))
|
|
791
|
+
if delta_value is not None:
|
|
792
|
+
delta_map[metric_id] = delta_value
|
|
793
|
+
for row in normalize_metric_rows(metric_rows):
|
|
794
|
+
if not isinstance(row, dict):
|
|
795
|
+
continue
|
|
796
|
+
metric_id = str(row.get("metric_id") or "").strip()
|
|
797
|
+
if not metric_id or metric_id in delta_map:
|
|
798
|
+
continue
|
|
799
|
+
delta_value = to_number(row.get("delta"))
|
|
800
|
+
if delta_value is not None:
|
|
801
|
+
delta_map[metric_id] = delta_value
|
|
802
|
+
return dict(delta_map)
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
def resolve_primary_metric_id(
|
|
806
|
+
*,
|
|
807
|
+
metric_contract: object = None,
|
|
808
|
+
metric_rows: object = None,
|
|
809
|
+
metrics_summary: object = None,
|
|
810
|
+
primary_metric: object = None,
|
|
811
|
+
progress_eval: object = None,
|
|
812
|
+
baseline_comparisons: object = None,
|
|
813
|
+
) -> str | None:
|
|
814
|
+
numeric_metrics = extract_numeric_metric_map(metric_rows=metric_rows, metrics_summary=metrics_summary)
|
|
815
|
+
if not numeric_metrics:
|
|
816
|
+
return None
|
|
817
|
+
|
|
818
|
+
contract = normalize_metric_contract(
|
|
819
|
+
metric_contract,
|
|
820
|
+
metrics_summary=metrics_summary,
|
|
821
|
+
metric_rows=metric_rows,
|
|
822
|
+
primary_metric=primary_metric,
|
|
823
|
+
)
|
|
824
|
+
candidates: list[str] = []
|
|
825
|
+
for value in (
|
|
826
|
+
(progress_eval or {}).get("primary_metric_id") if isinstance(progress_eval, dict) else None,
|
|
827
|
+
(baseline_comparisons or {}).get("primary_metric_id") if isinstance(baseline_comparisons, dict) else None,
|
|
828
|
+
contract.get("primary_metric_id"),
|
|
829
|
+
):
|
|
830
|
+
candidate = str(value or "").strip()
|
|
831
|
+
if candidate:
|
|
832
|
+
candidates.append(candidate)
|
|
833
|
+
if isinstance(primary_metric, dict):
|
|
834
|
+
candidate = str(
|
|
835
|
+
primary_metric.get("metric_id") or primary_metric.get("name") or primary_metric.get("id") or ""
|
|
836
|
+
).strip()
|
|
837
|
+
if candidate:
|
|
838
|
+
candidates.append(candidate)
|
|
839
|
+
elif isinstance(primary_metric, str):
|
|
840
|
+
candidate = primary_metric.strip()
|
|
841
|
+
if candidate:
|
|
842
|
+
candidates.append(candidate)
|
|
843
|
+
for candidate in candidates:
|
|
844
|
+
if candidate in numeric_metrics:
|
|
845
|
+
return candidate
|
|
846
|
+
return next(iter(numeric_metrics.keys()), None)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def extract_latest_metric(payload: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
850
|
+
if not isinstance(payload, dict) or not payload:
|
|
851
|
+
return None
|
|
852
|
+
numeric_metrics = extract_numeric_metric_map(
|
|
853
|
+
metric_rows=payload.get("metric_rows"),
|
|
854
|
+
metrics_summary=payload.get("metrics_summary"),
|
|
855
|
+
)
|
|
856
|
+
if not numeric_metrics:
|
|
857
|
+
return None
|
|
858
|
+
|
|
859
|
+
metric_id = resolve_primary_metric_id(
|
|
860
|
+
metric_contract=payload.get("metric_contract"),
|
|
861
|
+
metric_rows=payload.get("metric_rows"),
|
|
862
|
+
metrics_summary=payload.get("metrics_summary"),
|
|
863
|
+
primary_metric=payload.get("primary_metric"),
|
|
864
|
+
progress_eval=payload.get("progress_eval"),
|
|
865
|
+
baseline_comparisons=payload.get("baseline_comparisons"),
|
|
866
|
+
)
|
|
867
|
+
if not metric_id:
|
|
868
|
+
return None
|
|
869
|
+
metric_value = numeric_metrics.get(metric_id)
|
|
870
|
+
if metric_value is None:
|
|
871
|
+
return None
|
|
872
|
+
|
|
873
|
+
meta_map = extract_metric_meta_map(
|
|
874
|
+
metric_contract=payload.get("metric_contract"),
|
|
875
|
+
metric_rows=payload.get("metric_rows"),
|
|
876
|
+
metrics_summary=payload.get("metrics_summary"),
|
|
877
|
+
)
|
|
878
|
+
delta_map = extract_metric_delta_map(
|
|
879
|
+
metric_rows=payload.get("metric_rows"),
|
|
880
|
+
baseline_comparisons=payload.get("baseline_comparisons"),
|
|
881
|
+
)
|
|
882
|
+
meta = meta_map.get(metric_id) or {}
|
|
883
|
+
result = {
|
|
884
|
+
"key": metric_id,
|
|
885
|
+
"value": metric_value,
|
|
886
|
+
}
|
|
887
|
+
if metric_id in delta_map:
|
|
888
|
+
result["delta_vs_baseline"] = delta_map[metric_id]
|
|
889
|
+
if meta.get("label"):
|
|
890
|
+
result["label"] = meta["label"]
|
|
891
|
+
if meta.get("direction"):
|
|
892
|
+
result["direction"] = meta["direction"]
|
|
893
|
+
if meta.get("unit"):
|
|
894
|
+
result["unit"] = meta["unit"]
|
|
895
|
+
if meta.get("decimals") is not None:
|
|
896
|
+
result["decimals"] = meta["decimals"]
|
|
897
|
+
return result
|
|
898
|
+
|
|
899
|
+
|
|
243
900
|
def compare_with_baseline(
|
|
244
901
|
*,
|
|
245
902
|
metrics_summary: object,
|
|
903
|
+
metric_rows: object = None,
|
|
246
904
|
metric_contract: object,
|
|
247
905
|
baseline_metrics: object,
|
|
248
906
|
) -> dict[str, Any]:
|
|
249
|
-
run_summary =
|
|
250
|
-
baseline_summary =
|
|
251
|
-
contract = normalize_metric_contract(metric_contract, metrics_summary=run_summary)
|
|
907
|
+
run_summary = extract_numeric_metric_map(metric_rows=metric_rows, metrics_summary=metrics_summary)
|
|
908
|
+
baseline_summary = extract_numeric_metric_map(metrics_summary=baseline_metrics)
|
|
909
|
+
contract = normalize_metric_contract(metric_contract, metrics_summary=run_summary, metric_rows=metric_rows)
|
|
252
910
|
items: list[dict[str, Any]] = []
|
|
253
|
-
|
|
911
|
+
metric_meta = extract_metric_meta_map(
|
|
912
|
+
metric_contract=contract,
|
|
913
|
+
metric_rows=metric_rows,
|
|
914
|
+
metrics_summary=run_summary,
|
|
915
|
+
)
|
|
916
|
+
metric_ids = [
|
|
917
|
+
metric_id
|
|
918
|
+
for metric_id in metric_meta.keys()
|
|
919
|
+
if metric_id in run_summary or metric_id in baseline_summary
|
|
920
|
+
]
|
|
254
921
|
for metric_id in baseline_summary.keys():
|
|
255
922
|
if metric_id not in metric_ids:
|
|
256
923
|
metric_ids.append(metric_id)
|
|
257
924
|
for metric_id in run_summary.keys():
|
|
258
925
|
if metric_id not in metric_ids:
|
|
259
926
|
metric_ids.append(metric_id)
|
|
260
|
-
|
|
261
|
-
metric_meta = {
|
|
262
|
-
item["metric_id"]: item
|
|
263
|
-
for item in contract.get("metrics", [])
|
|
264
|
-
if isinstance(item, dict) and item.get("metric_id")
|
|
265
|
-
}
|
|
266
927
|
for metric_id in metric_ids:
|
|
267
928
|
meta = metric_meta.get(metric_id) or _normalize_metric_entry({}, fallback_id=metric_id)
|
|
268
929
|
run_value = run_summary.get(metric_id)
|
|
@@ -276,7 +937,7 @@ def compare_with_baseline(
|
|
|
276
937
|
delta = run_number - baseline_number
|
|
277
938
|
if baseline_number not in {0.0, -0.0}:
|
|
278
939
|
relative_delta = delta / abs(baseline_number)
|
|
279
|
-
direction = meta.get("direction")
|
|
940
|
+
direction = normalize_metric_direction(meta.get("direction"), metric_id=metric_id)
|
|
280
941
|
if direction == "maximize":
|
|
281
942
|
better = run_number > baseline_number
|
|
282
943
|
else:
|
|
@@ -285,7 +946,7 @@ def compare_with_baseline(
|
|
|
285
946
|
{
|
|
286
947
|
"metric_id": metric_id,
|
|
287
948
|
"label": meta.get("label") or metric_id,
|
|
288
|
-
"direction": meta.get("direction")
|
|
949
|
+
"direction": normalize_metric_direction(meta.get("direction"), metric_id=metric_id),
|
|
289
950
|
"unit": meta.get("unit"),
|
|
290
951
|
"decimals": meta.get("decimals"),
|
|
291
952
|
"chart_group": meta.get("chart_group"),
|
|
@@ -299,7 +960,10 @@ def compare_with_baseline(
|
|
|
299
960
|
}
|
|
300
961
|
)
|
|
301
962
|
|
|
302
|
-
primary_metric_id =
|
|
963
|
+
primary_metric_id = resolve_primary_metric_id(
|
|
964
|
+
metric_contract=contract,
|
|
965
|
+
metrics_summary=run_summary,
|
|
966
|
+
)
|
|
303
967
|
primary_item = next((item for item in items if item["metric_id"] == primary_metric_id), None)
|
|
304
968
|
if primary_item is None and items:
|
|
305
969
|
primary_item = items[0]
|
|
@@ -372,6 +1036,60 @@ def compute_progress_eval(
|
|
|
372
1036
|
}
|
|
373
1037
|
|
|
374
1038
|
|
|
1039
|
+
def _record_sort_key(record: dict[str, Any]) -> str:
|
|
1040
|
+
return str(record.get("updated_at") or record.get("created_at") or "")
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def _record_dedupe_key(record: dict[str, Any]) -> str:
|
|
1044
|
+
run_id = str(record.get("run_id") or "").strip()
|
|
1045
|
+
if run_id:
|
|
1046
|
+
return f"run:{run_id}"
|
|
1047
|
+
artifact_id = str(record.get("artifact_id") or "").strip()
|
|
1048
|
+
if artifact_id:
|
|
1049
|
+
return f"artifact:{artifact_id}"
|
|
1050
|
+
result_path = str(((record.get("paths") or {}) if isinstance(record.get("paths"), dict) else {}).get("result_json") or "").strip()
|
|
1051
|
+
if result_path:
|
|
1052
|
+
return f"path:{result_path}"
|
|
1053
|
+
branch_name = str(record.get("branch") or "").strip()
|
|
1054
|
+
return f"record:{branch_name}:{_record_sort_key(record)}"
|
|
1055
|
+
|
|
1056
|
+
|
|
1057
|
+
def _record_richness(record: dict[str, Any]) -> tuple[int, int, int, int, str]:
|
|
1058
|
+
numeric_metrics = extract_numeric_metric_map(
|
|
1059
|
+
metric_rows=record.get("metric_rows"),
|
|
1060
|
+
metrics_summary=record.get("metrics_summary"),
|
|
1061
|
+
)
|
|
1062
|
+
comparisons = extract_metric_comparison_map(record.get("baseline_comparisons"))
|
|
1063
|
+
has_result_path = int(
|
|
1064
|
+
bool(((record.get("paths") or {}) if isinstance(record.get("paths"), dict) else {}).get("result_json"))
|
|
1065
|
+
)
|
|
1066
|
+
metric_meta = extract_metric_meta_map(
|
|
1067
|
+
metric_contract=record.get("metric_contract"),
|
|
1068
|
+
metric_rows=record.get("metric_rows"),
|
|
1069
|
+
metrics_summary=record.get("metrics_summary"),
|
|
1070
|
+
)
|
|
1071
|
+
return (
|
|
1072
|
+
len(numeric_metrics),
|
|
1073
|
+
len(comparisons),
|
|
1074
|
+
has_result_path,
|
|
1075
|
+
len(metric_meta),
|
|
1076
|
+
_record_sort_key(record),
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
def dedupe_run_records(run_records: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
1081
|
+
deduped: OrderedDict[str, dict[str, Any]] = OrderedDict()
|
|
1082
|
+
for record in sorted(
|
|
1083
|
+
[item for item in run_records if isinstance(item, dict)],
|
|
1084
|
+
key=_record_sort_key,
|
|
1085
|
+
):
|
|
1086
|
+
key = _record_dedupe_key(record)
|
|
1087
|
+
existing = deduped.get(key)
|
|
1088
|
+
if existing is None or _record_richness(record) >= _record_richness(existing):
|
|
1089
|
+
deduped[key] = record
|
|
1090
|
+
return sorted(deduped.values(), key=_record_sort_key)
|
|
1091
|
+
|
|
1092
|
+
|
|
375
1093
|
def build_metrics_timeline(
|
|
376
1094
|
*,
|
|
377
1095
|
quest_id: str,
|
|
@@ -379,81 +1097,95 @@ def build_metrics_timeline(
|
|
|
379
1097
|
baseline_entry: dict[str, Any] | None = None,
|
|
380
1098
|
selected_variant_id: str | None = None,
|
|
381
1099
|
) -> dict[str, Any]:
|
|
382
|
-
ordered_runs =
|
|
383
|
-
|
|
384
|
-
key=lambda item: str(item.get("updated_at") or item.get("created_at") or ""),
|
|
385
|
-
)
|
|
1100
|
+
ordered_runs = dedupe_run_records(run_records)
|
|
1101
|
+
baseline_metrics = selected_baseline_metrics(baseline_entry, selected_variant_id)
|
|
386
1102
|
contract = normalize_metric_contract(
|
|
387
1103
|
None,
|
|
388
1104
|
baseline_id=str((baseline_entry or {}).get("baseline_id") or ""),
|
|
389
|
-
metrics_summary=
|
|
1105
|
+
metrics_summary=baseline_metrics,
|
|
390
1106
|
primary_metric=(baseline_entry or {}).get("primary_metric"),
|
|
391
1107
|
baseline_variants=(baseline_entry or {}).get("baseline_variants"),
|
|
392
1108
|
)
|
|
1109
|
+
primary_metric_id = str(contract.get("primary_metric_id") or "").strip() or None
|
|
393
1110
|
for record in ordered_runs:
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
1111
|
+
candidate = resolve_primary_metric_id(
|
|
1112
|
+
metric_contract=record.get("metric_contract"),
|
|
1113
|
+
metric_rows=record.get("metric_rows"),
|
|
1114
|
+
metrics_summary=record.get("metrics_summary"),
|
|
1115
|
+
progress_eval=record.get("progress_eval"),
|
|
1116
|
+
baseline_comparisons=record.get("baseline_comparisons"),
|
|
1117
|
+
)
|
|
1118
|
+
if candidate:
|
|
1119
|
+
primary_metric_id = candidate
|
|
397
1120
|
break
|
|
398
1121
|
|
|
399
1122
|
series_map: OrderedDict[str, dict[str, Any]] = OrderedDict()
|
|
1123
|
+
baseline_meta_map = extract_metric_meta_map(
|
|
1124
|
+
metric_contract=(baseline_entry or {}).get("metric_contract"),
|
|
1125
|
+
metrics_summary=baseline_metrics,
|
|
1126
|
+
)
|
|
1127
|
+
|
|
1128
|
+
def ensure_series(metric_id: str, meta: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
1129
|
+
resolved_meta = meta or baseline_meta_map.get(metric_id) or _normalize_metric_entry({}, fallback_id=metric_id)
|
|
1130
|
+
if metric_id not in series_map:
|
|
1131
|
+
series_map[metric_id] = {
|
|
1132
|
+
"metric_id": metric_id,
|
|
1133
|
+
"label": resolved_meta.get("label") or metric_id,
|
|
1134
|
+
"direction": normalize_metric_direction(resolved_meta.get("direction"), metric_id=metric_id),
|
|
1135
|
+
"unit": resolved_meta.get("unit"),
|
|
1136
|
+
"decimals": resolved_meta.get("decimals"),
|
|
1137
|
+
"chart_group": resolved_meta.get("chart_group"),
|
|
1138
|
+
"baselines": [],
|
|
1139
|
+
"points": [],
|
|
1140
|
+
}
|
|
1141
|
+
else:
|
|
1142
|
+
series_map[metric_id]["label"] = resolved_meta.get("label") or series_map[metric_id]["label"]
|
|
1143
|
+
series_map[metric_id]["direction"] = normalize_metric_direction(
|
|
1144
|
+
resolved_meta.get("direction") or series_map[metric_id]["direction"],
|
|
1145
|
+
metric_id=metric_id,
|
|
1146
|
+
)
|
|
1147
|
+
series_map[metric_id]["unit"] = resolved_meta.get("unit") or series_map[metric_id]["unit"]
|
|
1148
|
+
if resolved_meta.get("decimals") is not None:
|
|
1149
|
+
series_map[metric_id]["decimals"] = resolved_meta.get("decimals")
|
|
1150
|
+
series_map[metric_id]["chart_group"] = (
|
|
1151
|
+
resolved_meta.get("chart_group") or series_map[metric_id]["chart_group"]
|
|
1152
|
+
)
|
|
1153
|
+
return series_map[metric_id]
|
|
1154
|
+
|
|
400
1155
|
for metric in contract.get("metrics", []):
|
|
401
1156
|
metric_id = str(metric.get("metric_id") or "").strip()
|
|
402
1157
|
if not metric_id:
|
|
403
1158
|
continue
|
|
404
|
-
|
|
405
|
-
"metric_id": metric_id,
|
|
406
|
-
"label": metric.get("label") or metric_id,
|
|
407
|
-
"direction": metric.get("direction") or infer_metric_direction(metric_id),
|
|
408
|
-
"unit": metric.get("unit"),
|
|
409
|
-
"decimals": metric.get("decimals"),
|
|
410
|
-
"chart_group": metric.get("chart_group"),
|
|
411
|
-
"baselines": [],
|
|
412
|
-
"points": [],
|
|
413
|
-
}
|
|
1159
|
+
ensure_series(metric_id, metric)
|
|
414
1160
|
|
|
415
1161
|
for line in baseline_metric_lines(baseline_entry, selected_variant_id):
|
|
416
1162
|
metric_id = str(line.get("metric_id") or "").strip()
|
|
417
1163
|
if not metric_id:
|
|
418
1164
|
continue
|
|
419
|
-
|
|
420
|
-
metric_id,
|
|
421
|
-
{
|
|
422
|
-
"metric_id": metric_id,
|
|
423
|
-
"label": metric_id,
|
|
424
|
-
"direction": infer_metric_direction(metric_id),
|
|
425
|
-
"unit": None,
|
|
426
|
-
"decimals": None,
|
|
427
|
-
"chart_group": "default",
|
|
428
|
-
"baselines": [],
|
|
429
|
-
"points": [],
|
|
430
|
-
},
|
|
431
|
-
)
|
|
432
|
-
series_map[metric_id]["baselines"].append(line)
|
|
1165
|
+
ensure_series(metric_id).setdefault("baselines", []).append(line)
|
|
433
1166
|
|
|
434
1167
|
for index, record in enumerate(ordered_runs, start=1):
|
|
435
|
-
|
|
1168
|
+
numeric_metrics = extract_numeric_metric_map(
|
|
1169
|
+
metric_rows=record.get("metric_rows"),
|
|
1170
|
+
metrics_summary=record.get("metrics_summary"),
|
|
1171
|
+
)
|
|
1172
|
+
raw_values = extract_metric_raw_value_map(
|
|
1173
|
+
metric_rows=record.get("metric_rows"),
|
|
1174
|
+
metrics_summary=record.get("metrics_summary"),
|
|
1175
|
+
)
|
|
436
1176
|
progress = record.get("progress_eval") if isinstance(record.get("progress_eval"), dict) else {}
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
"direction": infer_metric_direction(metric_id),
|
|
450
|
-
"unit": None,
|
|
451
|
-
"decimals": None,
|
|
452
|
-
"chart_group": "default",
|
|
453
|
-
"baselines": [],
|
|
454
|
-
"points": [],
|
|
455
|
-
},
|
|
456
|
-
)
|
|
1177
|
+
comparison_by_id = extract_metric_comparison_map(record.get("baseline_comparisons"))
|
|
1178
|
+
delta_by_id = extract_metric_delta_map(
|
|
1179
|
+
metric_rows=record.get("metric_rows"),
|
|
1180
|
+
baseline_comparisons=record.get("baseline_comparisons"),
|
|
1181
|
+
)
|
|
1182
|
+
record_meta = extract_metric_meta_map(
|
|
1183
|
+
metric_contract=record.get("metric_contract"),
|
|
1184
|
+
metric_rows=record.get("metric_rows"),
|
|
1185
|
+
metrics_summary=record.get("metrics_summary"),
|
|
1186
|
+
)
|
|
1187
|
+
for metric_id, numeric_value in numeric_metrics.items():
|
|
1188
|
+
ensure_series(metric_id, record_meta.get(metric_id))
|
|
457
1189
|
comparison = comparison_by_id.get(metric_id, {})
|
|
458
1190
|
series_map[metric_id]["points"].append(
|
|
459
1191
|
{
|
|
@@ -463,9 +1195,9 @@ def build_metrics_timeline(
|
|
|
463
1195
|
"created_at": record.get("updated_at") or record.get("created_at"),
|
|
464
1196
|
"branch": record.get("branch"),
|
|
465
1197
|
"idea_id": record.get("idea_id"),
|
|
466
|
-
"value":
|
|
467
|
-
"raw_value":
|
|
468
|
-
"delta_vs_baseline":
|
|
1198
|
+
"value": numeric_value,
|
|
1199
|
+
"raw_value": raw_values.get(metric_id, numeric_value),
|
|
1200
|
+
"delta_vs_baseline": delta_by_id.get(metric_id),
|
|
469
1201
|
"relative_delta_vs_baseline": comparison.get("relative_delta"),
|
|
470
1202
|
"breakthrough": bool(progress.get("breakthrough")),
|
|
471
1203
|
"breakthrough_level": progress.get("breakthrough_level"),
|
|
@@ -473,7 +1205,6 @@ def build_metrics_timeline(
|
|
|
473
1205
|
}
|
|
474
1206
|
)
|
|
475
1207
|
|
|
476
|
-
primary_metric_id = str(contract.get("primary_metric_id") or "").strip() or None
|
|
477
1208
|
series = [item for item in series_map.values() if item["points"] or item["baselines"]]
|
|
478
1209
|
return {
|
|
479
1210
|
"quest_id": quest_id,
|