@researai/deepscientist 1.5.1 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -1
- package/bin/ds.js +2239 -153
- package/docs/en/00_QUICK_START.md +60 -20
- package/docs/en/01_SETTINGS_REFERENCE.md +20 -20
- package/docs/en/02_START_RESEARCH_GUIDE.md +11 -11
- package/docs/en/03_QQ_CONNECTOR_GUIDE.md +10 -10
- package/docs/en/05_TUI_GUIDE.md +1 -1
- package/docs/en/09_DOCTOR.md +48 -4
- package/docs/en/90_ARCHITECTURE.md +4 -2
- package/docs/zh/00_QUICK_START.md +60 -20
- package/docs/zh/01_SETTINGS_REFERENCE.md +21 -21
- package/docs/zh/02_START_RESEARCH_GUIDE.md +19 -19
- package/docs/zh/03_QQ_CONNECTOR_GUIDE.md +10 -10
- package/docs/zh/05_TUI_GUIDE.md +1 -1
- package/docs/zh/09_DOCTOR.md +46 -4
- package/install.sh +125 -8
- package/package.json +2 -1
- package/pyproject.toml +1 -1
- package/src/deepscientist/__init__.py +6 -1
- package/src/deepscientist/artifact/service.py +553 -26
- package/src/deepscientist/bash_exec/monitor.py +23 -4
- package/src/deepscientist/bash_exec/runtime.py +3 -0
- package/src/deepscientist/bash_exec/service.py +132 -4
- package/src/deepscientist/bridges/base.py +10 -19
- package/src/deepscientist/channels/discord_gateway.py +25 -2
- package/src/deepscientist/channels/feishu_long_connection.py +41 -3
- package/src/deepscientist/channels/qq.py +524 -64
- package/src/deepscientist/channels/qq_gateway.py +22 -3
- package/src/deepscientist/channels/relay.py +429 -90
- package/src/deepscientist/channels/slack_socket.py +29 -5
- package/src/deepscientist/channels/telegram_polling.py +25 -2
- package/src/deepscientist/channels/whatsapp_local_session.py +32 -4
- package/src/deepscientist/cli.py +27 -0
- package/src/deepscientist/config/models.py +6 -40
- package/src/deepscientist/config/service.py +165 -156
- package/src/deepscientist/connector_profiles.py +346 -0
- package/src/deepscientist/connector_runtime.py +88 -43
- package/src/deepscientist/daemon/api/handlers.py +65 -11
- package/src/deepscientist/daemon/api/router.py +4 -2
- package/src/deepscientist/daemon/app.py +772 -219
- package/src/deepscientist/doctor.py +69 -2
- package/src/deepscientist/gitops/diff.py +3 -0
- package/src/deepscientist/home.py +25 -2
- package/src/deepscientist/mcp/context.py +3 -1
- package/src/deepscientist/mcp/server.py +66 -7
- package/src/deepscientist/migration.py +114 -0
- package/src/deepscientist/prompts/builder.py +71 -3
- package/src/deepscientist/qq_profiles.py +186 -0
- package/src/deepscientist/quest/layout.py +1 -0
- package/src/deepscientist/quest/service.py +70 -12
- package/src/deepscientist/quest/stage_views.py +46 -0
- package/src/deepscientist/runners/codex.py +2 -0
- package/src/deepscientist/shared.py +44 -17
- package/src/prompts/connectors/lingzhu.md +3 -0
- package/src/prompts/connectors/qq.md +42 -2
- package/src/prompts/system.md +123 -10
- package/src/skills/analysis-campaign/SKILL.md +35 -6
- package/src/skills/baseline/SKILL.md +73 -32
- package/src/skills/decision/SKILL.md +4 -3
- package/src/skills/experiment/SKILL.md +28 -6
- package/src/skills/finalize/SKILL.md +5 -2
- package/src/skills/idea/SKILL.md +2 -2
- package/src/skills/intake-audit/SKILL.md +2 -2
- package/src/skills/rebuttal/SKILL.md +4 -2
- package/src/skills/review/SKILL.md +4 -2
- package/src/skills/scout/SKILL.md +2 -2
- package/src/skills/write/SKILL.md +2 -2
- package/src/tui/package.json +1 -1
- package/src/ui/dist/assets/{AiManusChatView-w5lF2Ttt.js → AiManusChatView-qzChi9uh.js} +67 -94
- package/src/ui/dist/assets/{AnalysisPlugin-DJOED79I.js → AnalysisPlugin-CcC_-UqN.js} +1 -1
- package/src/ui/dist/assets/{AutoFigurePlugin-DaG61Y0M.js → AutoFigurePlugin-DD8LkJLe.js} +5 -5
- package/src/ui/dist/assets/{CliPlugin-CV4LqUB_.js → CliPlugin-DJJFfVmW.js} +17 -110
- package/src/ui/dist/assets/{CodeEditorPlugin-DylfAea4.js → CodeEditorPlugin-CrjkHNLh.js} +8 -8
- package/src/ui/dist/assets/{CodeViewerPlugin-F7saY0LM.js → CodeViewerPlugin-obnD6G5R.js} +5 -5
- package/src/ui/dist/assets/{DocViewerPlugin-COP0c7jf.js → DocViewerPlugin-DB9SUQVd.js} +3 -3
- package/src/ui/dist/assets/{GitDiffViewerPlugin-CAS05pT9.js → GitDiffViewerPlugin-DZLlNlD2.js} +1 -1
- package/src/ui/dist/assets/{ImageViewerPlugin-Bco1CN_w.js → ImageViewerPlugin-BGwfDZ0Y.js} +5 -5
- package/src/ui/dist/assets/{LabCopilotPanel-CvMlCD99.js → LabCopilotPanel-dfLptQcR.js} +10 -10
- package/src/ui/dist/assets/{LabPlugin-BYankkE4.js → LabPlugin-CeGjAl3A.js} +1 -1
- package/src/ui/dist/assets/{LatexPlugin-LDSMR-t-.js → LatexPlugin-BBJ7kd1V.js} +7 -7
- package/src/ui/dist/assets/{MarkdownViewerPlugin-B7o80jgm.js → MarkdownViewerPlugin-DKZi7BcB.js} +4 -4
- package/src/ui/dist/assets/{MarketplacePlugin-CM6ZOcpC.js → MarketplacePlugin-C_k-9jD0.js} +3 -3
- package/src/ui/dist/assets/{NotebookEditor-Dc61cXmK.js → NotebookEditor-4R88_BMO.js} +1 -1
- package/src/ui/dist/assets/{PdfLoader-DWowuQwx.js → PdfLoader-DwEFQLrw.js} +1 -1
- package/src/ui/dist/assets/{PdfMarkdownPlugin-BsJM1q_a.js → PdfMarkdownPlugin-D-jdsqF8.js} +3 -3
- package/src/ui/dist/assets/{PdfViewerPlugin-DB2eEEFQ.js → PdfViewerPlugin-CmeBGDY0.js} +10 -10
- package/src/ui/dist/assets/{SearchPlugin-CraThSvt.js → SearchPlugin-Dlz2WKJ4.js} +1 -1
- package/src/ui/dist/assets/{Stepper-CgocRTPq.js → Stepper-ClOgzWM3.js} +1 -1
- package/src/ui/dist/assets/{TextViewerPlugin-B1JGhKtd.js → TextViewerPlugin-DDQWxibk.js} +4 -4
- package/src/ui/dist/assets/{VNCViewer-CclFC7FM.js → VNCViewer-CJXT0Nm8.js} +9 -9
- package/src/ui/dist/assets/{bibtex-D3IKsMl7.js → bibtex-DLr4Rtk4.js} +1 -1
- package/src/ui/dist/assets/{code-BP37Xx0p.js → code-DgKK408Y.js} +1 -1
- package/src/ui/dist/assets/{file-content-BAJSu-9r.js → file-content-6HBqQnvQ.js} +1 -1
- package/src/ui/dist/assets/{file-diff-panel-DUGeCTuy.js → file-diff-panel-Dhu0TbBM.js} +1 -1
- package/src/ui/dist/assets/{file-socket-CXc1Ojf7.js → file-socket-CP3iwVZG.js} +1 -1
- package/src/ui/dist/assets/{file-utils-2J21jt7M.js → file-utils-BsS-Aw68.js} +1 -1
- package/src/ui/dist/assets/{image-CMMmgvcn.js → image-ByeK-Zcv.js} +1 -1
- package/src/ui/dist/assets/{index-DmwmJmbW.js → index-BLjo5--a.js} +33610 -31016
- package/src/ui/dist/assets/{index-CWgMgpow.js → index-BdsE0uRz.js} +11 -11
- package/src/ui/dist/assets/{index-s7aHnNQ4.js → index-C-eX-N6A.js} +1 -1
- package/src/ui/dist/assets/{index-KGt-z-dD.css → index-CuQhlrR-.css} +2747 -2
- package/src/ui/dist/assets/{index-BaVumsQT.js → index-DyremSIv.js} +2 -2
- package/src/ui/dist/assets/{message-square-CQRfX0Am.js → message-square-DnagiLnc.js} +1 -1
- package/src/ui/dist/assets/{monaco-B4TbdsrF.js → monaco-4kBFeprs.js} +1 -1
- package/src/ui/dist/assets/{popover-B8Rokodk.js → popover-hRCXZzs2.js} +1 -1
- package/src/ui/dist/assets/{project-sync-D_i96KH4.js → project-sync-O_85YuP6.js} +1 -1
- package/src/ui/dist/assets/{sigma-D12PnzCN.js → sigma-DvKopSnL.js} +1 -1
- package/src/ui/dist/assets/{tooltip-B6YrI4aJ.js → tooltip-BmlPc6kc.js} +1 -1
- package/src/ui/dist/assets/{trash-Bc8jGp0V.js → trash-n-UvdZFR.js} +1 -1
- package/src/ui/dist/assets/{useCliAccess-mXVCYSZ-.js → useCliAccess-WDd3_wIh.js} +1 -1
- package/src/ui/dist/assets/{useFileDiffOverlay-Bg6b9H9K.js → useFileDiffOverlay-rXLIL2NF.js} +1 -1
- package/src/ui/dist/assets/{wrap-text-Drh5GEnL.js → wrap-text-qIYQ4a_W.js} +1 -1
- package/src/ui/dist/assets/{zoom-out-CJj9DZLn.js → zoom-out-fZXCEFsy.js} +1 -1
- package/src/ui/dist/index.html +2 -2
- package/uv.lock +1155 -0
- package/src/ui/dist/assets/LabPlugin-D9jVIo0A.css +0 -2698
|
@@ -91,6 +91,41 @@ class ArtifactService:
|
|
|
91
91
|
self.baselines = BaselineRegistry(home)
|
|
92
92
|
self.quest_service = QuestService(home)
|
|
93
93
|
|
|
94
|
+
def _normalize_evaluation_summary(self, payload: dict[str, Any] | None) -> dict[str, str] | None:
|
|
95
|
+
if not isinstance(payload, dict):
|
|
96
|
+
return None
|
|
97
|
+
normalized: dict[str, str] = {}
|
|
98
|
+
for key in (
|
|
99
|
+
"takeaway",
|
|
100
|
+
"claim_update",
|
|
101
|
+
"baseline_relation",
|
|
102
|
+
"comparability",
|
|
103
|
+
"failure_mode",
|
|
104
|
+
"next_action",
|
|
105
|
+
):
|
|
106
|
+
value = payload.get(key)
|
|
107
|
+
if value is None:
|
|
108
|
+
continue
|
|
109
|
+
text = str(value).strip()
|
|
110
|
+
if text:
|
|
111
|
+
normalized[key] = text
|
|
112
|
+
return normalized or None
|
|
113
|
+
|
|
114
|
+
def _evaluation_summary_markdown_lines(self, payload: dict[str, Any] | None) -> list[str]:
|
|
115
|
+
normalized = self._normalize_evaluation_summary(payload)
|
|
116
|
+
if not normalized:
|
|
117
|
+
return ["- Not recorded."]
|
|
118
|
+
labels = (
|
|
119
|
+
("takeaway", "Takeaway"),
|
|
120
|
+
("claim_update", "Claim Update"),
|
|
121
|
+
("baseline_relation", "Baseline Relation"),
|
|
122
|
+
("comparability", "Comparability"),
|
|
123
|
+
("failure_mode", "Failure Mode"),
|
|
124
|
+
("next_action", "Next Action"),
|
|
125
|
+
)
|
|
126
|
+
lines = [f"- {label}: {normalized[key]}" for key, label in labels if normalized.get(key)]
|
|
127
|
+
return lines or ["- Not recorded."]
|
|
128
|
+
|
|
94
129
|
def _workspace_root_for(self, quest_root: Path, workspace_root: Path | None = None) -> Path:
|
|
95
130
|
if workspace_root is not None:
|
|
96
131
|
return workspace_root
|
|
@@ -387,6 +422,207 @@ class ArtifactService:
|
|
|
387
422
|
write_json(path, normalized)
|
|
388
423
|
return normalized
|
|
389
424
|
|
|
425
|
+
def _analysis_baseline_inventory_path(self, quest_root: Path) -> Path:
|
|
426
|
+
return ensure_dir(quest_root / "artifacts" / "baselines") / "analysis_inventory.json"
|
|
427
|
+
|
|
428
|
+
def _read_analysis_baseline_inventory(self, quest_root: Path) -> dict[str, Any]:
|
|
429
|
+
path = self._analysis_baseline_inventory_path(quest_root)
|
|
430
|
+
payload = read_json(path, {})
|
|
431
|
+
if not isinstance(payload, dict):
|
|
432
|
+
payload = {}
|
|
433
|
+
entries = payload.get("entries") if isinstance(payload.get("entries"), list) else []
|
|
434
|
+
return {
|
|
435
|
+
"schema_version": 1,
|
|
436
|
+
"entries": [dict(item) for item in entries if isinstance(item, dict)],
|
|
437
|
+
"updated_at": payload.get("updated_at"),
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
def _write_analysis_baseline_inventory(self, quest_root: Path, payload: dict[str, Any]) -> dict[str, Any]:
|
|
441
|
+
path = self._analysis_baseline_inventory_path(quest_root)
|
|
442
|
+
normalized_entries = payload.get("entries") if isinstance(payload.get("entries"), list) else []
|
|
443
|
+
normalized = {
|
|
444
|
+
"schema_version": 1,
|
|
445
|
+
"entries": [dict(item) for item in normalized_entries if isinstance(item, dict)],
|
|
446
|
+
"updated_at": utc_now(),
|
|
447
|
+
}
|
|
448
|
+
write_json(path, normalized)
|
|
449
|
+
return normalized
|
|
450
|
+
|
|
451
|
+
def _normalize_baseline_root_rel_path(
|
|
452
|
+
self,
|
|
453
|
+
quest_root: Path,
|
|
454
|
+
baseline_root_rel_path: str | None,
|
|
455
|
+
*,
|
|
456
|
+
baseline_id: str | None = None,
|
|
457
|
+
) -> tuple[str | None, str | None]:
|
|
458
|
+
raw = str(baseline_root_rel_path or "").strip()
|
|
459
|
+
if not raw:
|
|
460
|
+
return None, None
|
|
461
|
+
candidate = Path(raw)
|
|
462
|
+
resolved = candidate.resolve() if candidate.is_absolute() else resolve_within(quest_root, raw)
|
|
463
|
+
if not resolved.exists():
|
|
464
|
+
raise FileNotFoundError(f"Baseline root does not exist: {resolved}")
|
|
465
|
+
try:
|
|
466
|
+
relative = resolved.relative_to(quest_root.resolve()).as_posix()
|
|
467
|
+
except ValueError as exc:
|
|
468
|
+
raise ValueError("`baseline_root_rel_path` must stay within quest_root.") from exc
|
|
469
|
+
parts = Path(relative).parts
|
|
470
|
+
if len(parts) < 3 or parts[0] != "baselines" or parts[1] not in {"local", "imported"}:
|
|
471
|
+
raise ValueError(
|
|
472
|
+
"`baseline_root_rel_path` must live under `baselines/local/<baseline_id>/...` or "
|
|
473
|
+
"`baselines/imported/<baseline_id>/...`."
|
|
474
|
+
)
|
|
475
|
+
normalized_baseline_id = str(baseline_id or parts[2]).strip() or None
|
|
476
|
+
if normalized_baseline_id and parts[2] != normalized_baseline_id:
|
|
477
|
+
raise ValueError(
|
|
478
|
+
f"`baseline_root_rel_path` points to baseline `{parts[2]}`, which does not match `{normalized_baseline_id}`."
|
|
479
|
+
)
|
|
480
|
+
return relative, parts[1]
|
|
481
|
+
|
|
482
|
+
@staticmethod
|
|
483
|
+
def _analysis_baseline_label(payload: dict[str, Any]) -> str:
|
|
484
|
+
baseline_id = str(payload.get("baseline_id") or "baseline").strip() or "baseline"
|
|
485
|
+
parts = [f"`{baseline_id}`"]
|
|
486
|
+
variant_id = str(payload.get("variant_id") or "").strip()
|
|
487
|
+
if variant_id:
|
|
488
|
+
parts.append(f"variant `{variant_id}`")
|
|
489
|
+
benchmark = str(payload.get("benchmark") or "").strip()
|
|
490
|
+
split = str(payload.get("split") or "").strip()
|
|
491
|
+
if benchmark and split:
|
|
492
|
+
parts.append(f"benchmark `{benchmark}` / split `{split}`")
|
|
493
|
+
elif benchmark:
|
|
494
|
+
parts.append(f"benchmark `{benchmark}`")
|
|
495
|
+
elif split:
|
|
496
|
+
parts.append(f"split `{split}`")
|
|
497
|
+
reason = str(payload.get("reason") or "").strip()
|
|
498
|
+
if reason:
|
|
499
|
+
parts.append(f"reason: {reason}")
|
|
500
|
+
return " · ".join(parts)
|
|
501
|
+
|
|
502
|
+
def _normalize_required_baselines(self, quest_root: Path, values: list[object] | None) -> list[dict[str, Any]]:
|
|
503
|
+
normalized: list[dict[str, Any]] = []
|
|
504
|
+
for raw in values or []:
|
|
505
|
+
if not isinstance(raw, dict):
|
|
506
|
+
continue
|
|
507
|
+
baseline_id = str(raw.get("baseline_id") or "").strip()
|
|
508
|
+
if not baseline_id:
|
|
509
|
+
continue
|
|
510
|
+
baseline_root_rel_path, storage_mode = self._normalize_baseline_root_rel_path(
|
|
511
|
+
quest_root,
|
|
512
|
+
raw.get("baseline_root_rel_path"),
|
|
513
|
+
baseline_id=baseline_id,
|
|
514
|
+
)
|
|
515
|
+
normalized.append(
|
|
516
|
+
{
|
|
517
|
+
"baseline_id": baseline_id,
|
|
518
|
+
"variant_id": str(raw.get("variant_id") or "").strip() or None,
|
|
519
|
+
"reason": str(raw.get("reason") or "").strip() or None,
|
|
520
|
+
"benchmark": str(raw.get("benchmark") or "").strip() or None,
|
|
521
|
+
"split": str(raw.get("split") or "").strip() or None,
|
|
522
|
+
"baseline_root_rel_path": baseline_root_rel_path,
|
|
523
|
+
"storage_mode": storage_mode or (str(raw.get("storage_mode") or "").strip() or None),
|
|
524
|
+
"usage_scope": "supplementary",
|
|
525
|
+
}
|
|
526
|
+
)
|
|
527
|
+
return normalized
|
|
528
|
+
|
|
529
|
+
def _normalize_comparison_baselines(self, quest_root: Path, values: list[object] | None) -> list[dict[str, Any]]:
|
|
530
|
+
normalized: list[dict[str, Any]] = []
|
|
531
|
+
for raw in values or []:
|
|
532
|
+
if not isinstance(raw, dict):
|
|
533
|
+
continue
|
|
534
|
+
baseline_id = str(raw.get("baseline_id") or "").strip()
|
|
535
|
+
if not baseline_id:
|
|
536
|
+
continue
|
|
537
|
+
baseline_root_rel_path, storage_mode = self._normalize_baseline_root_rel_path(
|
|
538
|
+
quest_root,
|
|
539
|
+
raw.get("baseline_root_rel_path"),
|
|
540
|
+
baseline_id=baseline_id,
|
|
541
|
+
)
|
|
542
|
+
metrics_summary = (
|
|
543
|
+
normalize_metrics_summary(raw.get("metrics_summary"))
|
|
544
|
+
if isinstance(raw.get("metrics_summary"), dict)
|
|
545
|
+
else {}
|
|
546
|
+
)
|
|
547
|
+
normalized.append(
|
|
548
|
+
{
|
|
549
|
+
"baseline_id": baseline_id,
|
|
550
|
+
"variant_id": str(raw.get("variant_id") or "").strip() or None,
|
|
551
|
+
"benchmark": str(raw.get("benchmark") or "").strip() or None,
|
|
552
|
+
"split": str(raw.get("split") or "").strip() or None,
|
|
553
|
+
"reason": str(raw.get("reason") or "").strip() or None,
|
|
554
|
+
"metrics_summary": metrics_summary,
|
|
555
|
+
"evidence_paths": [
|
|
556
|
+
str(item).strip() for item in (raw.get("evidence_paths") or []) if str(item).strip()
|
|
557
|
+
],
|
|
558
|
+
"baseline_root_rel_path": baseline_root_rel_path,
|
|
559
|
+
"storage_mode": storage_mode or (str(raw.get("storage_mode") or "").strip() or None),
|
|
560
|
+
"usage_scope": "supplementary",
|
|
561
|
+
"published": bool(raw.get("published", False)),
|
|
562
|
+
"published_entry_id": str(raw.get("published_entry_id") or "").strip() or None,
|
|
563
|
+
"status": str(raw.get("status") or "registered").strip() or "registered",
|
|
564
|
+
}
|
|
565
|
+
)
|
|
566
|
+
return normalized
|
|
567
|
+
|
|
568
|
+
@staticmethod
|
|
569
|
+
def _analysis_inventory_entry_key(payload: dict[str, Any]) -> tuple[str, str, str, str, str, str]:
|
|
570
|
+
origin = dict(payload.get("origin") or {}) if isinstance(payload.get("origin"), dict) else {}
|
|
571
|
+
return (
|
|
572
|
+
str(payload.get("baseline_id") or "").strip(),
|
|
573
|
+
str(payload.get("variant_id") or "").strip(),
|
|
574
|
+
str(origin.get("campaign_id") or "").strip(),
|
|
575
|
+
str(origin.get("slice_id") or "").strip(),
|
|
576
|
+
str(payload.get("benchmark") or "").strip(),
|
|
577
|
+
str(payload.get("split") or "").strip(),
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
@staticmethod
|
|
581
|
+
def _merge_analysis_inventory_entry(existing: dict[str, Any], incoming: dict[str, Any]) -> dict[str, Any]:
|
|
582
|
+
merged = dict(existing)
|
|
583
|
+
for key, value in incoming.items():
|
|
584
|
+
if value is None:
|
|
585
|
+
continue
|
|
586
|
+
if isinstance(value, str) and not value.strip():
|
|
587
|
+
continue
|
|
588
|
+
if isinstance(value, (list, dict)) and not value:
|
|
589
|
+
continue
|
|
590
|
+
merged[key] = value
|
|
591
|
+
merged["updated_at"] = utc_now()
|
|
592
|
+
merged.setdefault("created_at", existing.get("created_at") or incoming.get("created_at") or utc_now())
|
|
593
|
+
return merged
|
|
594
|
+
|
|
595
|
+
def _upsert_analysis_baseline_inventory(self, quest_root: Path, entries: list[dict[str, Any]]) -> dict[str, Any]:
|
|
596
|
+
inventory = self._read_analysis_baseline_inventory(quest_root)
|
|
597
|
+
existing_entries = [dict(item) for item in (inventory.get("entries") or []) if isinstance(item, dict)]
|
|
598
|
+
by_key = {
|
|
599
|
+
self._analysis_inventory_entry_key(item): dict(item)
|
|
600
|
+
for item in existing_entries
|
|
601
|
+
if str(item.get("baseline_id") or "").strip()
|
|
602
|
+
}
|
|
603
|
+
for raw in entries:
|
|
604
|
+
if not isinstance(raw, dict):
|
|
605
|
+
continue
|
|
606
|
+
entry = dict(raw)
|
|
607
|
+
if not str(entry.get("baseline_id") or "").strip():
|
|
608
|
+
continue
|
|
609
|
+
key = self._analysis_inventory_entry_key(entry)
|
|
610
|
+
current = by_key.get(key)
|
|
611
|
+
if current is None:
|
|
612
|
+
stamped = dict(entry)
|
|
613
|
+
stamped.setdefault("created_at", utc_now())
|
|
614
|
+
stamped["updated_at"] = utc_now()
|
|
615
|
+
by_key[key] = stamped
|
|
616
|
+
continue
|
|
617
|
+
by_key[key] = self._merge_analysis_inventory_entry(current, entry)
|
|
618
|
+
normalized = self._write_analysis_baseline_inventory(
|
|
619
|
+
quest_root,
|
|
620
|
+
{
|
|
621
|
+
"entries": list(by_key.values()),
|
|
622
|
+
},
|
|
623
|
+
)
|
|
624
|
+
return normalized
|
|
625
|
+
|
|
390
626
|
def _paper_root(self, quest_root: Path) -> Path:
|
|
391
627
|
return ensure_dir(quest_root / "paper")
|
|
392
628
|
|
|
@@ -405,6 +641,114 @@ class ArtifactService:
|
|
|
405
641
|
def _paper_bundle_manifest_path(self, quest_root: Path) -> Path:
|
|
406
642
|
return self._paper_root(quest_root) / "paper_bundle_manifest.json"
|
|
407
643
|
|
|
644
|
+
def _paper_baseline_inventory_path(self, quest_root: Path) -> Path:
|
|
645
|
+
return self._paper_root(quest_root) / "baseline_inventory.json"
|
|
646
|
+
|
|
647
|
+
def _open_source_root(self, quest_root: Path) -> Path:
|
|
648
|
+
return ensure_dir(quest_root / "release" / "open_source")
|
|
649
|
+
|
|
650
|
+
def _open_source_manifest_path(self, quest_root: Path) -> Path:
|
|
651
|
+
return self._open_source_root(quest_root) / "manifest.json"
|
|
652
|
+
|
|
653
|
+
def _open_source_cleanup_plan_path(self, quest_root: Path) -> Path:
|
|
654
|
+
return self._open_source_root(quest_root) / "cleanup_plan.md"
|
|
655
|
+
|
|
656
|
+
def _open_source_include_paths_path(self, quest_root: Path) -> Path:
|
|
657
|
+
return self._open_source_root(quest_root) / "include_paths.json"
|
|
658
|
+
|
|
659
|
+
def _open_source_exclude_paths_path(self, quest_root: Path) -> Path:
|
|
660
|
+
return self._open_source_root(quest_root) / "exclude_paths.json"
|
|
661
|
+
|
|
662
|
+
def _write_paper_baseline_inventory(self, quest_root: Path) -> dict[str, Any]:
|
|
663
|
+
quest_yaml = self.quest_service.read_quest_yaml(quest_root)
|
|
664
|
+
confirmed_baseline_ref = (
|
|
665
|
+
dict(quest_yaml.get("confirmed_baseline_ref") or {})
|
|
666
|
+
if isinstance(quest_yaml.get("confirmed_baseline_ref"), dict)
|
|
667
|
+
else None
|
|
668
|
+
)
|
|
669
|
+
analysis_inventory = self._read_analysis_baseline_inventory(quest_root)
|
|
670
|
+
payload = {
|
|
671
|
+
"schema_version": 1,
|
|
672
|
+
"canonical_baseline_ref": confirmed_baseline_ref,
|
|
673
|
+
"supplementary_baselines": [
|
|
674
|
+
dict(item) for item in (analysis_inventory.get("entries") or []) if isinstance(item, dict)
|
|
675
|
+
],
|
|
676
|
+
"updated_at": utc_now(),
|
|
677
|
+
}
|
|
678
|
+
write_json(self._paper_baseline_inventory_path(quest_root), payload)
|
|
679
|
+
return payload
|
|
680
|
+
|
|
681
|
+
def _ensure_open_source_prep(
|
|
682
|
+
self,
|
|
683
|
+
quest_root: Path,
|
|
684
|
+
*,
|
|
685
|
+
source_branch: str | None,
|
|
686
|
+
source_bundle_manifest_path: str,
|
|
687
|
+
baseline_inventory_path: str,
|
|
688
|
+
) -> dict[str, Any]:
|
|
689
|
+
root = self._open_source_root(quest_root)
|
|
690
|
+
cleanup_plan_path = self._open_source_cleanup_plan_path(quest_root)
|
|
691
|
+
include_paths_path = self._open_source_include_paths_path(quest_root)
|
|
692
|
+
exclude_paths_path = self._open_source_exclude_paths_path(quest_root)
|
|
693
|
+
manifest_path = self._open_source_manifest_path(quest_root)
|
|
694
|
+
if not cleanup_plan_path.exists():
|
|
695
|
+
write_text(
|
|
696
|
+
cleanup_plan_path,
|
|
697
|
+
"\n".join(
|
|
698
|
+
[
|
|
699
|
+
"# Open Source Cleanup Plan",
|
|
700
|
+
"",
|
|
701
|
+
"## Goal",
|
|
702
|
+
"",
|
|
703
|
+
"Prepare a clean public code branch from the finalized paper line.",
|
|
704
|
+
"",
|
|
705
|
+
"## Keep",
|
|
706
|
+
"",
|
|
707
|
+
"- Core training / evaluation code needed to reproduce the public results.",
|
|
708
|
+
"",
|
|
709
|
+
"## Remove Or Private",
|
|
710
|
+
"",
|
|
711
|
+
"- Temporary logs, scratch files, local secrets, and unrelated experimental debris.",
|
|
712
|
+
"",
|
|
713
|
+
"## Before Release",
|
|
714
|
+
"",
|
|
715
|
+
"- Confirm README, license, and benchmark instructions are complete.",
|
|
716
|
+
"- Confirm only necessary files remain in scope.",
|
|
717
|
+
"",
|
|
718
|
+
]
|
|
719
|
+
).rstrip()
|
|
720
|
+
+ "\n",
|
|
721
|
+
)
|
|
722
|
+
if not include_paths_path.exists():
|
|
723
|
+
write_json(include_paths_path, {"paths": []})
|
|
724
|
+
if not exclude_paths_path.exists():
|
|
725
|
+
write_json(exclude_paths_path, {"paths": []})
|
|
726
|
+
existing = read_json(manifest_path, {})
|
|
727
|
+
existing = existing if isinstance(existing, dict) else {}
|
|
728
|
+
manifest = {
|
|
729
|
+
**existing,
|
|
730
|
+
"schema_version": 1,
|
|
731
|
+
"status": str(existing.get("status") or "draft").strip() or "draft",
|
|
732
|
+
"source_branch": str(existing.get("source_branch") or source_branch or "").strip() or None,
|
|
733
|
+
"release_branch": str(existing.get("release_branch") or "").strip() or None,
|
|
734
|
+
"source_bundle_manifest_path": str(
|
|
735
|
+
existing.get("source_bundle_manifest_path") or source_bundle_manifest_path or ""
|
|
736
|
+
).strip()
|
|
737
|
+
or source_bundle_manifest_path,
|
|
738
|
+
"baseline_inventory_path": str(existing.get("baseline_inventory_path") or baseline_inventory_path or "").strip()
|
|
739
|
+
or baseline_inventory_path,
|
|
740
|
+
"cleanup_plan_path": str(existing.get("cleanup_plan_path") or "release/open_source/cleanup_plan.md").strip()
|
|
741
|
+
or "release/open_source/cleanup_plan.md",
|
|
742
|
+
"include_paths_path": str(existing.get("include_paths_path") or "release/open_source/include_paths.json").strip()
|
|
743
|
+
or "release/open_source/include_paths.json",
|
|
744
|
+
"exclude_paths_path": str(existing.get("exclude_paths_path") or "release/open_source/exclude_paths.json").strip()
|
|
745
|
+
or "release/open_source/exclude_paths.json",
|
|
746
|
+
"created_at": existing.get("created_at") or utc_now(),
|
|
747
|
+
"updated_at": utc_now(),
|
|
748
|
+
}
|
|
749
|
+
write_json(manifest_path, manifest)
|
|
750
|
+
return manifest
|
|
751
|
+
|
|
408
752
|
def _next_paper_outline_id(self, quest_root: Path) -> str:
|
|
409
753
|
max_index = 0
|
|
410
754
|
for root in (self._paper_outline_candidates_root(quest_root), self._paper_outline_revisions_root(quest_root)):
|
|
@@ -2367,6 +2711,7 @@ class ArtifactService:
|
|
|
2367
2711
|
status: str = "completed",
|
|
2368
2712
|
baseline_id: str | None = None,
|
|
2369
2713
|
baseline_variant_id: str | None = None,
|
|
2714
|
+
evaluation_summary: dict[str, Any] | None = None,
|
|
2370
2715
|
) -> dict[str, Any]:
|
|
2371
2716
|
self._require_baseline_gate_open(quest_root, action="record_main_experiment")
|
|
2372
2717
|
state = self.quest_service.read_research_state(quest_root)
|
|
@@ -2445,6 +2790,7 @@ class ArtifactService:
|
|
|
2445
2790
|
resolved_config_paths = [str(item).strip() for item in (config_paths or []) if str(item).strip()]
|
|
2446
2791
|
resolved_notes = [str(item).strip() for item in (notes or []) if str(item).strip()]
|
|
2447
2792
|
normalized_dataset_scope = str(dataset_scope or "full").strip().lower() or "full"
|
|
2793
|
+
normalized_evaluation_summary = self._normalize_evaluation_summary(evaluation_summary)
|
|
2448
2794
|
primary = comparisons.get("primary") if isinstance(comparisons, dict) else {}
|
|
2449
2795
|
primary_metric_id = str(progress_eval.get("primary_metric_id") or comparisons.get("primary_metric_id") or "").strip() or None
|
|
2450
2796
|
primary_value = primary.get("run_value") if isinstance(primary, dict) else None
|
|
@@ -2554,6 +2900,8 @@ class ArtifactService:
|
|
|
2554
2900
|
if resolved_notes:
|
|
2555
2901
|
run_lines.extend(["", "## Notes", ""])
|
|
2556
2902
|
run_lines.extend([f"- {item}" for item in resolved_notes])
|
|
2903
|
+
run_lines.extend(["", "## Evaluation Summary", ""])
|
|
2904
|
+
run_lines.extend(self._evaluation_summary_markdown_lines(normalized_evaluation_summary))
|
|
2557
2905
|
run_lines.extend(
|
|
2558
2906
|
[
|
|
2559
2907
|
"",
|
|
@@ -2602,6 +2950,7 @@ class ArtifactService:
|
|
|
2602
2950
|
key: value for key, value in comparisons.items() if key != "primary"
|
|
2603
2951
|
},
|
|
2604
2952
|
"progress_eval": progress_eval,
|
|
2953
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
2605
2954
|
"delivery_policy": delivery_policy,
|
|
2606
2955
|
"startup_contract": delivery_policy.get("startup_contract") or None,
|
|
2607
2956
|
"evidence_paths": resolved_evidence_paths,
|
|
@@ -2642,6 +2991,7 @@ class ArtifactService:
|
|
|
2642
2991
|
"recommended_next_route": delivery_policy.get("recommended_next_route"),
|
|
2643
2992
|
"changed_file_count": len(resolved_changed_files),
|
|
2644
2993
|
"evidence_count": len(resolved_evidence_paths),
|
|
2994
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
2645
2995
|
},
|
|
2646
2996
|
"delivery_policy": delivery_policy,
|
|
2647
2997
|
"startup_contract": delivery_policy.get("startup_contract") or None,
|
|
@@ -2657,6 +3007,7 @@ class ArtifactService:
|
|
|
2657
3007
|
key: value for key, value in comparisons.items() if key != "primary"
|
|
2658
3008
|
},
|
|
2659
3009
|
"progress_eval": progress_eval,
|
|
3010
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
2660
3011
|
"files_changed": resolved_changed_files,
|
|
2661
3012
|
"evidence_paths": resolved_evidence_paths,
|
|
2662
3013
|
"verdict": verdict,
|
|
@@ -2693,6 +3044,7 @@ class ArtifactService:
|
|
|
2693
3044
|
"breakthrough_level": progress_eval.get("breakthrough_level"),
|
|
2694
3045
|
"need_research_paper": delivery_policy.get("need_research_paper"),
|
|
2695
3046
|
"recommended_next_route": delivery_policy.get("recommended_next_route"),
|
|
3047
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
2696
3048
|
}
|
|
2697
3049
|
],
|
|
2698
3050
|
)
|
|
@@ -2715,6 +3067,7 @@ class ArtifactService:
|
|
|
2715
3067
|
key: value for key, value in comparisons.items() if key != "primary"
|
|
2716
3068
|
},
|
|
2717
3069
|
"progress_eval": progress_eval,
|
|
3070
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
2718
3071
|
"delivery_policy": delivery_policy,
|
|
2719
3072
|
}
|
|
2720
3073
|
|
|
@@ -2752,6 +3105,7 @@ class ArtifactService:
|
|
|
2752
3105
|
normalized_experimental_designs = self._normalize_string_list(experimental_designs)
|
|
2753
3106
|
normalized_todo_items = self._normalize_campaign_todo_items(todo_items)
|
|
2754
3107
|
slice_contexts: list[dict[str, Any]] = []
|
|
3108
|
+
inventory_entries: list[dict[str, Any]] = []
|
|
2755
3109
|
for index, raw in enumerate(slices, start=1):
|
|
2756
3110
|
slice_id = str(raw.get("slice_id") or generate_id("slice")).strip()
|
|
2757
3111
|
title = str(raw.get("title") or slice_id).strip() or slice_id
|
|
@@ -2783,6 +3137,10 @@ class ArtifactService:
|
|
|
2783
3137
|
abandonment_criteria = str(
|
|
2784
3138
|
raw.get("abandonment_criteria") or matched_todo.get("abandonment_criteria") or ""
|
|
2785
3139
|
).strip()
|
|
3140
|
+
required_baselines = self._normalize_required_baselines(
|
|
3141
|
+
quest_root,
|
|
3142
|
+
raw.get("required_baselines") or matched_todo.get("required_baselines"),
|
|
3143
|
+
)
|
|
2786
3144
|
plan_dir = ensure_dir(worktree_root / "experiments" / "analysis" / campaign_id / slice_id)
|
|
2787
3145
|
plan_path = plan_dir / "plan.md"
|
|
2788
3146
|
requirement_lines = [
|
|
@@ -2812,33 +3170,44 @@ class ArtifactService:
|
|
|
2812
3170
|
"",
|
|
2813
3171
|
str(raw.get("required_changes") or "").strip() or "TBD",
|
|
2814
3172
|
"",
|
|
2815
|
-
"##
|
|
2816
|
-
"",
|
|
2817
|
-
str(raw.get("metric_contract") or "").strip() or "TBD",
|
|
2818
|
-
"",
|
|
2819
|
-
"## Environment Notes",
|
|
2820
|
-
"",
|
|
2821
|
-
str(raw.get("environment_notes") or "").strip() or "TBD",
|
|
2822
|
-
"",
|
|
2823
|
-
"## Must Not Simplify",
|
|
2824
|
-
"",
|
|
2825
|
-
str(raw.get("must_not_simplify") or "").strip() or "Full dataset / full protocol only unless explicitly approved.",
|
|
2826
|
-
"",
|
|
2827
|
-
"## Success Criteria",
|
|
2828
|
-
"",
|
|
2829
|
-
success_criteria or "TBD",
|
|
2830
|
-
"",
|
|
2831
|
-
"## Abandonment Criteria",
|
|
2832
|
-
"",
|
|
2833
|
-
abandonment_criteria or "TBD",
|
|
2834
|
-
"",
|
|
2835
|
-
"## Completion Condition",
|
|
2836
|
-
"",
|
|
2837
|
-
str(raw.get("completion_condition") or matched_todo.get("completion_condition") or "").strip()
|
|
2838
|
-
or str(raw.get("must_not_simplify") or matched_todo.get("must_not_simplify") or "").strip()
|
|
2839
|
-
or "Complete the planned analysis slice and mirror the durable result back to the parent branch.",
|
|
3173
|
+
"## Required Baselines",
|
|
2840
3174
|
"",
|
|
2841
3175
|
]
|
|
3176
|
+
if required_baselines:
|
|
3177
|
+
requirement_lines.extend([f"- {self._analysis_baseline_label(item)}" for item in required_baselines])
|
|
3178
|
+
else:
|
|
3179
|
+
requirement_lines.append("- None recorded.")
|
|
3180
|
+
requirement_lines.extend(
|
|
3181
|
+
[
|
|
3182
|
+
"",
|
|
3183
|
+
"## Metric Contract",
|
|
3184
|
+
"",
|
|
3185
|
+
str(raw.get("metric_contract") or "").strip() or "TBD",
|
|
3186
|
+
"",
|
|
3187
|
+
"## Environment Notes",
|
|
3188
|
+
"",
|
|
3189
|
+
str(raw.get("environment_notes") or "").strip() or "TBD",
|
|
3190
|
+
"",
|
|
3191
|
+
"## Must Not Simplify",
|
|
3192
|
+
"",
|
|
3193
|
+
str(raw.get("must_not_simplify") or "").strip() or "Full dataset / full protocol only unless explicitly approved.",
|
|
3194
|
+
"",
|
|
3195
|
+
"## Success Criteria",
|
|
3196
|
+
"",
|
|
3197
|
+
success_criteria or "TBD",
|
|
3198
|
+
"",
|
|
3199
|
+
"## Abandonment Criteria",
|
|
3200
|
+
"",
|
|
3201
|
+
abandonment_criteria or "TBD",
|
|
3202
|
+
"",
|
|
3203
|
+
"## Completion Condition",
|
|
3204
|
+
"",
|
|
3205
|
+
str(raw.get("completion_condition") or matched_todo.get("completion_condition") or "").strip()
|
|
3206
|
+
or str(raw.get("must_not_simplify") or matched_todo.get("must_not_simplify") or "").strip()
|
|
3207
|
+
or "Complete the planned analysis slice and mirror the durable result back to the parent branch.",
|
|
3208
|
+
"",
|
|
3209
|
+
]
|
|
3210
|
+
)
|
|
2842
3211
|
requirement_lines.extend(["## Reviewer Item IDs", ""])
|
|
2843
3212
|
if reviewer_item_ids:
|
|
2844
3213
|
requirement_lines.extend([f"- `{item}`" for item in reviewer_item_ids])
|
|
@@ -2879,9 +3248,31 @@ class ArtifactService:
|
|
|
2879
3248
|
"completion_condition": str(
|
|
2880
3249
|
raw.get("completion_condition") or matched_todo.get("completion_condition") or ""
|
|
2881
3250
|
).strip(),
|
|
3251
|
+
"required_baselines": required_baselines,
|
|
2882
3252
|
"reviewer_item_ids": reviewer_item_ids,
|
|
2883
3253
|
"manuscript_targets": manuscript_targets,
|
|
2884
3254
|
}
|
|
3255
|
+
)
|
|
3256
|
+
inventory_entries.extend(
|
|
3257
|
+
[
|
|
3258
|
+
{
|
|
3259
|
+
"baseline_id": item.get("baseline_id"),
|
|
3260
|
+
"variant_id": item.get("variant_id"),
|
|
3261
|
+
"usage_scope": "supplementary",
|
|
3262
|
+
"status": "required",
|
|
3263
|
+
"reason": item.get("reason"),
|
|
3264
|
+
"benchmark": item.get("benchmark"),
|
|
3265
|
+
"split": item.get("split"),
|
|
3266
|
+
"baseline_root_rel_path": item.get("baseline_root_rel_path"),
|
|
3267
|
+
"storage_mode": item.get("storage_mode"),
|
|
3268
|
+
"origin": {
|
|
3269
|
+
"stage": "analysis_campaign",
|
|
3270
|
+
"campaign_id": campaign_id,
|
|
3271
|
+
"slice_id": slice_id,
|
|
3272
|
+
},
|
|
3273
|
+
}
|
|
3274
|
+
for item in required_baselines
|
|
3275
|
+
]
|
|
2885
3276
|
)
|
|
2886
3277
|
|
|
2887
3278
|
todo_manifest = {
|
|
@@ -2903,6 +3294,7 @@ class ArtifactService:
|
|
|
2903
3294
|
"why_now": item.get("why_now") or context.get("why_now"),
|
|
2904
3295
|
"success_criteria": item.get("success_criteria") or context.get("success_criteria"),
|
|
2905
3296
|
"abandonment_criteria": item.get("abandonment_criteria") or context.get("abandonment_criteria"),
|
|
3297
|
+
"required_baselines": item.get("required_baselines") or context.get("required_baselines") or [],
|
|
2906
3298
|
"reviewer_item_ids": item.get("reviewer_item_ids") or context.get("reviewer_item_ids") or [],
|
|
2907
3299
|
"manuscript_targets": item.get("manuscript_targets") or context.get("manuscript_targets") or [],
|
|
2908
3300
|
}
|
|
@@ -2957,6 +3349,7 @@ class ArtifactService:
|
|
|
2957
3349
|
f"- Research question: {item['research_question'] or 'TBD'}",
|
|
2958
3350
|
f"- Experimental design: {item['experimental_design'] or 'TBD'}",
|
|
2959
3351
|
f"- Why now: {item['why_now'] or 'TBD'}",
|
|
3352
|
+
f"- Required baselines: {', '.join(self._analysis_baseline_label(entry) for entry in item['required_baselines']) or 'none'}",
|
|
2960
3353
|
f"- Success criteria: {item['success_criteria'] or 'TBD'}",
|
|
2961
3354
|
f"- Abandonment criteria: {item['abandonment_criteria'] or 'TBD'}",
|
|
2962
3355
|
f"- Completion condition: {item['completion_condition'] or item['must_not_simplify'] or 'TBD'}",
|
|
@@ -3017,6 +3410,7 @@ class ArtifactService:
|
|
|
3017
3410
|
"why_now": item["why_now"],
|
|
3018
3411
|
"completion_condition": item["completion_condition"] or item["must_not_simplify"],
|
|
3019
3412
|
"must_not_simplify": item["must_not_simplify"],
|
|
3413
|
+
"required_baselines": item["required_baselines"],
|
|
3020
3414
|
"success_criteria": item["success_criteria"],
|
|
3021
3415
|
"abandonment_criteria": item["abandonment_criteria"],
|
|
3022
3416
|
"reviewer_item_ids": item["reviewer_item_ids"],
|
|
@@ -3065,6 +3459,7 @@ class ArtifactService:
|
|
|
3065
3459
|
"why_now": item["why_now"],
|
|
3066
3460
|
"completion_condition": item["completion_condition"] or item["must_not_simplify"],
|
|
3067
3461
|
"must_not_simplify": item["must_not_simplify"],
|
|
3462
|
+
"required_baselines": item["required_baselines"],
|
|
3068
3463
|
"success_criteria": item["success_criteria"],
|
|
3069
3464
|
"abandonment_criteria": item["abandonment_criteria"],
|
|
3070
3465
|
"reviewer_item_ids": item["reviewer_item_ids"],
|
|
@@ -3089,6 +3484,7 @@ class ArtifactService:
|
|
|
3089
3484
|
workspace_mode="analysis",
|
|
3090
3485
|
last_flow_type="analysis_campaign",
|
|
3091
3486
|
)
|
|
3487
|
+
baseline_inventory = self._upsert_analysis_baseline_inventory(quest_root, inventory_entries) if inventory_entries else None
|
|
3092
3488
|
self.quest_service.update_settings(self._quest_id(quest_root), active_anchor="analysis-campaign")
|
|
3093
3489
|
checkpoint_result = self._checkpoint_with_optional_push(
|
|
3094
3490
|
parent_worktree_root,
|
|
@@ -3138,6 +3534,7 @@ class ArtifactService:
|
|
|
3138
3534
|
"charter_path": str(charter_path),
|
|
3139
3535
|
"slices": slice_contexts,
|
|
3140
3536
|
"manifest": manifest,
|
|
3537
|
+
"analysis_baseline_inventory": baseline_inventory,
|
|
3141
3538
|
"todo_manifest_path": str(todo_manifest_path),
|
|
3142
3539
|
"artifact": artifact,
|
|
3143
3540
|
"checkpoint": checkpoint_result,
|
|
@@ -3320,6 +3717,18 @@ class ArtifactService:
|
|
|
3320
3717
|
raise ValueError("submit_paper_bundle requires a selected outline or explicit `outline_path`.")
|
|
3321
3718
|
|
|
3322
3719
|
manifest_path = self._paper_bundle_manifest_path(quest_root)
|
|
3720
|
+
baseline_inventory = self._write_paper_baseline_inventory(quest_root)
|
|
3721
|
+
baseline_inventory_path = self._paper_baseline_inventory_path(quest_root)
|
|
3722
|
+
source_branch = (
|
|
3723
|
+
str(self.quest_service.read_research_state(quest_root).get("current_workspace_branch") or "").strip()
|
|
3724
|
+
or current_branch(self._workspace_root_for(quest_root))
|
|
3725
|
+
)
|
|
3726
|
+
open_source_manifest = self._ensure_open_source_prep(
|
|
3727
|
+
quest_root,
|
|
3728
|
+
source_branch=source_branch,
|
|
3729
|
+
source_bundle_manifest_path="paper/paper_bundle_manifest.json",
|
|
3730
|
+
baseline_inventory_path="paper/baseline_inventory.json",
|
|
3731
|
+
)
|
|
3323
3732
|
manifest = {
|
|
3324
3733
|
"schema_version": 1,
|
|
3325
3734
|
"title": str(
|
|
@@ -3338,6 +3747,10 @@ class ArtifactService:
|
|
|
3338
3747
|
"compile_report_path": str(compile_report_path or "paper/build/compile_report.json").strip() or None,
|
|
3339
3748
|
"pdf_path": str(pdf_path or "").strip() or None,
|
|
3340
3749
|
"latex_root_path": str(latex_root_path or "").strip() or None,
|
|
3750
|
+
"baseline_inventory_path": "paper/baseline_inventory.json",
|
|
3751
|
+
"open_source_manifest_path": "release/open_source/manifest.json",
|
|
3752
|
+
"open_source_cleanup_plan_path": str(open_source_manifest.get("cleanup_plan_path") or "").strip()
|
|
3753
|
+
or "release/open_source/cleanup_plan.md",
|
|
3341
3754
|
"selected_outline_ref": str(selected_outline.get("outline_id") or "").strip() or None,
|
|
3342
3755
|
"created_at": utc_now(),
|
|
3343
3756
|
"updated_at": utc_now(),
|
|
@@ -3359,10 +3772,14 @@ class ArtifactService:
|
|
|
3359
3772
|
"outline_path": manifest.get("outline_path"),
|
|
3360
3773
|
"draft_path": manifest.get("draft_path"),
|
|
3361
3774
|
"pdf_path": manifest.get("pdf_path"),
|
|
3775
|
+
"baseline_inventory_path": str(baseline_inventory_path),
|
|
3776
|
+
"open_source_manifest_path": str(self._open_source_manifest_path(quest_root)),
|
|
3362
3777
|
},
|
|
3363
3778
|
"details": {
|
|
3364
3779
|
"title": manifest.get("title"),
|
|
3365
3780
|
"selected_outline_ref": manifest.get("selected_outline_ref"),
|
|
3781
|
+
"baseline_inventory_count": len(baseline_inventory.get("supplementary_baselines") or []),
|
|
3782
|
+
"open_source_status": open_source_manifest.get("status"),
|
|
3366
3783
|
},
|
|
3367
3784
|
},
|
|
3368
3785
|
checkpoint=False,
|
|
@@ -3372,6 +3789,8 @@ class ArtifactService:
|
|
|
3372
3789
|
"ok": True,
|
|
3373
3790
|
"manifest_path": str(manifest_path),
|
|
3374
3791
|
"manifest": manifest,
|
|
3792
|
+
"baseline_inventory_path": str(baseline_inventory_path),
|
|
3793
|
+
"open_source_manifest_path": str(self._open_source_manifest_path(quest_root)),
|
|
3375
3794
|
"artifact": artifact,
|
|
3376
3795
|
}
|
|
3377
3796
|
|
|
@@ -3394,6 +3813,8 @@ class ArtifactService:
|
|
|
3394
3813
|
next_recommendation: str | None = None,
|
|
3395
3814
|
dataset_scope: str = "full",
|
|
3396
3815
|
subset_approval_ref: str | None = None,
|
|
3816
|
+
comparison_baselines: list[dict[str, Any]] | None = None,
|
|
3817
|
+
evaluation_summary: dict[str, Any] | None = None,
|
|
3397
3818
|
) -> dict[str, Any]:
|
|
3398
3819
|
state = self.quest_service.read_research_state(quest_root)
|
|
3399
3820
|
manifest = self._read_analysis_manifest(quest_root, campaign_id)
|
|
@@ -3408,16 +3829,19 @@ class ArtifactService:
|
|
|
3408
3829
|
evidence_paths = [str(item).strip() for item in (evidence_paths or []) if str(item).strip()]
|
|
3409
3830
|
deviations = [str(item).strip() for item in (deviations or []) if str(item).strip()]
|
|
3410
3831
|
metric_rows = [item for item in (metric_rows or []) if isinstance(item, dict)]
|
|
3832
|
+
normalized_comparison_baselines = self._normalize_comparison_baselines(quest_root, comparison_baselines)
|
|
3411
3833
|
normalized_claim_impact = str(claim_impact or "").strip() or None
|
|
3412
3834
|
normalized_reviewer_resolution = str(reviewer_resolution or "").strip() or None
|
|
3413
3835
|
normalized_manuscript_update_hint = str(manuscript_update_hint or "").strip() or None
|
|
3414
3836
|
normalized_next_recommendation = str(next_recommendation or "").strip() or None
|
|
3837
|
+
normalized_evaluation_summary = self._normalize_evaluation_summary(evaluation_summary)
|
|
3415
3838
|
slice_worktree_root = Path(str(target.get("worktree_root") or ""))
|
|
3416
3839
|
parent_worktree_root = Path(str(manifest.get("parent_worktree_root") or ""))
|
|
3417
3840
|
parent_branch = str(manifest.get("parent_branch") or "")
|
|
3418
3841
|
|
|
3419
3842
|
result_dir = ensure_dir(slice_worktree_root / "experiments" / "analysis" / campaign_id / slice_id)
|
|
3420
3843
|
result_path = result_dir / "RESULT.md"
|
|
3844
|
+
result_json_path = result_dir / "RESULT.json"
|
|
3421
3845
|
result_lines = [
|
|
3422
3846
|
f"# {target.get('title') or slice_id}",
|
|
3423
3847
|
"",
|
|
@@ -3456,6 +3880,10 @@ class ArtifactService:
|
|
|
3456
3880
|
"",
|
|
3457
3881
|
normalized_next_recommendation or "Not recorded.",
|
|
3458
3882
|
"",
|
|
3883
|
+
"## Evaluation Summary",
|
|
3884
|
+
"",
|
|
3885
|
+
*self._evaluation_summary_markdown_lines(normalized_evaluation_summary),
|
|
3886
|
+
"",
|
|
3459
3887
|
"## Deviations",
|
|
3460
3888
|
"",
|
|
3461
3889
|
]
|
|
@@ -3472,6 +3900,20 @@ class ArtifactService:
|
|
|
3472
3900
|
result_lines.extend(["", "## Metric Rows", ""])
|
|
3473
3901
|
for row in metric_rows:
|
|
3474
3902
|
result_lines.append(f"- `{row}`")
|
|
3903
|
+
result_lines.extend(["", "## Comparison Baselines", ""])
|
|
3904
|
+
if normalized_comparison_baselines:
|
|
3905
|
+
for entry in normalized_comparison_baselines:
|
|
3906
|
+
result_lines.append(f"- {self._analysis_baseline_label(entry)}")
|
|
3907
|
+
if entry.get("baseline_root_rel_path"):
|
|
3908
|
+
result_lines.append(f" - Root: `{entry['baseline_root_rel_path']}`")
|
|
3909
|
+
if entry.get("metrics_summary"):
|
|
3910
|
+
result_lines.append(f" - Metrics: `{entry['metrics_summary']}`")
|
|
3911
|
+
if entry.get("published"):
|
|
3912
|
+
result_lines.append(
|
|
3913
|
+
f" - Published: `{entry.get('published_entry_id') or entry.get('baseline_id')}`"
|
|
3914
|
+
)
|
|
3915
|
+
else:
|
|
3916
|
+
result_lines.append("- None recorded.")
|
|
3475
3917
|
if subset_approval_ref:
|
|
3476
3918
|
result_lines.extend(["", "## Subset Approval", "", f"`{subset_approval_ref}`"])
|
|
3477
3919
|
write_text(result_path, "\n".join(result_lines).rstrip() + "\n")
|
|
@@ -3486,6 +3928,37 @@ class ArtifactService:
|
|
|
3486
3928
|
if len(keys) == 1:
|
|
3487
3929
|
metrics_summary[keys[0]] = row.get(keys[0])
|
|
3488
3930
|
|
|
3931
|
+
result_payload = {
|
|
3932
|
+
"schema_version": 1,
|
|
3933
|
+
"result_kind": "analysis_slice",
|
|
3934
|
+
"campaign_id": campaign_id,
|
|
3935
|
+
"slice_id": slice_id,
|
|
3936
|
+
"status": status,
|
|
3937
|
+
"title": target.get("title"),
|
|
3938
|
+
"goal": target.get("goal"),
|
|
3939
|
+
"run_kind": target.get("run_kind"),
|
|
3940
|
+
"required_baselines": target.get("required_baselines") or [],
|
|
3941
|
+
"comparison_baselines": normalized_comparison_baselines,
|
|
3942
|
+
"metrics_summary": metrics_summary,
|
|
3943
|
+
"metric_rows": metric_rows,
|
|
3944
|
+
"dataset_scope": normalized_scope,
|
|
3945
|
+
"subset_approval_ref": subset_approval_ref,
|
|
3946
|
+
"setup": setup.strip() or None,
|
|
3947
|
+
"execution": execution.strip() or None,
|
|
3948
|
+
"results": results.strip() or None,
|
|
3949
|
+
"claim_impact": normalized_claim_impact,
|
|
3950
|
+
"reviewer_resolution": normalized_reviewer_resolution,
|
|
3951
|
+
"manuscript_update_hint": normalized_manuscript_update_hint,
|
|
3952
|
+
"next_recommendation": normalized_next_recommendation,
|
|
3953
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
3954
|
+
"deviations": deviations,
|
|
3955
|
+
"evidence_paths": evidence_paths,
|
|
3956
|
+
"source_branch": str(target.get("branch") or ""),
|
|
3957
|
+
"source_worktree_root": str(slice_worktree_root),
|
|
3958
|
+
"updated_at": utc_now(),
|
|
3959
|
+
}
|
|
3960
|
+
write_json(result_json_path, result_payload)
|
|
3961
|
+
|
|
3489
3962
|
mirror_dir = ensure_dir(parent_worktree_root / "experiments" / "analysis-results" / campaign_id)
|
|
3490
3963
|
mirror_path = mirror_dir / f"{slice_id}.md"
|
|
3491
3964
|
mirror_lines = [
|
|
@@ -3524,7 +3997,17 @@ class ArtifactService:
|
|
|
3524
3997
|
"",
|
|
3525
3998
|
normalized_manuscript_update_hint or "Not recorded.",
|
|
3526
3999
|
"",
|
|
4000
|
+
"## Evaluation Summary",
|
|
4001
|
+
"",
|
|
4002
|
+
*self._evaluation_summary_markdown_lines(normalized_evaluation_summary),
|
|
4003
|
+
"",
|
|
3527
4004
|
]
|
|
4005
|
+
mirror_lines.extend(["## Comparison Baselines", ""])
|
|
4006
|
+
if normalized_comparison_baselines:
|
|
4007
|
+
mirror_lines.extend([f"- {self._analysis_baseline_label(entry)}" for entry in normalized_comparison_baselines])
|
|
4008
|
+
else:
|
|
4009
|
+
mirror_lines.append("- None recorded.")
|
|
4010
|
+
mirror_lines.append("")
|
|
3528
4011
|
write_text(mirror_path, "\n".join(mirror_lines).rstrip() + "\n")
|
|
3529
4012
|
|
|
3530
4013
|
artifact = self.record(
|
|
@@ -3548,6 +4031,7 @@ class ArtifactService:
|
|
|
3548
4031
|
"protocol_step": "record",
|
|
3549
4032
|
"paths": {
|
|
3550
4033
|
"slice_result_md": str(result_path),
|
|
4034
|
+
"slice_result_json": str(result_json_path),
|
|
3551
4035
|
"parent_result_md": str(mirror_path),
|
|
3552
4036
|
},
|
|
3553
4037
|
"details": {
|
|
@@ -3563,7 +4047,11 @@ class ArtifactService:
|
|
|
3563
4047
|
"next_recommendation": normalized_next_recommendation,
|
|
3564
4048
|
"deviations": deviations,
|
|
3565
4049
|
"evidence_paths": evidence_paths,
|
|
4050
|
+
"required_baselines": target.get("required_baselines") or [],
|
|
4051
|
+
"comparison_baselines": normalized_comparison_baselines,
|
|
4052
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
3566
4053
|
},
|
|
4054
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
3567
4055
|
},
|
|
3568
4056
|
checkpoint=False,
|
|
3569
4057
|
workspace_root=slice_worktree_root,
|
|
@@ -3586,11 +4074,14 @@ class ArtifactService:
|
|
|
3586
4074
|
updated["status"] = status
|
|
3587
4075
|
updated["completed_at"] = utc_now()
|
|
3588
4076
|
updated["result_path"] = str(result_path)
|
|
4077
|
+
updated["result_json_path"] = str(result_json_path)
|
|
3589
4078
|
updated["mirror_path"] = str(mirror_path)
|
|
3590
4079
|
updated["claim_impact"] = normalized_claim_impact
|
|
3591
4080
|
updated["reviewer_resolution"] = normalized_reviewer_resolution
|
|
3592
4081
|
updated["manuscript_update_hint"] = normalized_manuscript_update_hint
|
|
3593
4082
|
updated["next_recommendation"] = normalized_next_recommendation
|
|
4083
|
+
updated["comparison_baselines"] = normalized_comparison_baselines
|
|
4084
|
+
updated["evaluation_summary"] = normalized_evaluation_summary
|
|
3594
4085
|
updated_slices.append(updated)
|
|
3595
4086
|
next_slice = next((item for item in updated_slices if str(item.get("status") or "") == "pending"), None)
|
|
3596
4087
|
manifest = self._write_analysis_manifest(
|
|
@@ -3601,6 +4092,36 @@ class ArtifactService:
|
|
|
3601
4092
|
"slices": updated_slices,
|
|
3602
4093
|
},
|
|
3603
4094
|
)
|
|
4095
|
+
baseline_inventory = (
|
|
4096
|
+
self._upsert_analysis_baseline_inventory(
|
|
4097
|
+
quest_root,
|
|
4098
|
+
[
|
|
4099
|
+
{
|
|
4100
|
+
"baseline_id": entry.get("baseline_id"),
|
|
4101
|
+
"variant_id": entry.get("variant_id"),
|
|
4102
|
+
"usage_scope": "supplementary",
|
|
4103
|
+
"status": "registered",
|
|
4104
|
+
"reason": entry.get("reason"),
|
|
4105
|
+
"benchmark": entry.get("benchmark"),
|
|
4106
|
+
"split": entry.get("split"),
|
|
4107
|
+
"baseline_root_rel_path": entry.get("baseline_root_rel_path"),
|
|
4108
|
+
"storage_mode": entry.get("storage_mode"),
|
|
4109
|
+
"metrics_summary": entry.get("metrics_summary"),
|
|
4110
|
+
"evidence_paths": entry.get("evidence_paths"),
|
|
4111
|
+
"published": entry.get("published"),
|
|
4112
|
+
"published_entry_id": entry.get("published_entry_id"),
|
|
4113
|
+
"origin": {
|
|
4114
|
+
"stage": "analysis_campaign",
|
|
4115
|
+
"campaign_id": campaign_id,
|
|
4116
|
+
"slice_id": slice_id,
|
|
4117
|
+
},
|
|
4118
|
+
}
|
|
4119
|
+
for entry in normalized_comparison_baselines
|
|
4120
|
+
],
|
|
4121
|
+
)
|
|
4122
|
+
if normalized_comparison_baselines
|
|
4123
|
+
else self._read_analysis_baseline_inventory(quest_root)
|
|
4124
|
+
)
|
|
3604
4125
|
|
|
3605
4126
|
if next_slice is not None:
|
|
3606
4127
|
research_state = self.quest_service.update_research_state(
|
|
@@ -3642,14 +4163,17 @@ class ArtifactService:
|
|
|
3642
4163
|
"slice_id": slice_id,
|
|
3643
4164
|
"status": status,
|
|
3644
4165
|
"result_path": str(result_path),
|
|
4166
|
+
"result_json_path": str(result_json_path),
|
|
3645
4167
|
"mirror_path": str(mirror_path),
|
|
3646
4168
|
"artifact": artifact,
|
|
3647
4169
|
"slice_checkpoint": slice_checkpoint,
|
|
3648
4170
|
"parent_checkpoint": parent_checkpoint,
|
|
3649
4171
|
"next_slice": next_slice,
|
|
3650
4172
|
"manifest": manifest,
|
|
4173
|
+
"analysis_baseline_inventory": baseline_inventory,
|
|
3651
4174
|
"interaction": interaction,
|
|
3652
4175
|
"research_state": research_state,
|
|
4176
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
3653
4177
|
"completed": False,
|
|
3654
4178
|
}
|
|
3655
4179
|
|
|
@@ -3744,6 +4268,7 @@ class ArtifactService:
|
|
|
3744
4268
|
"slice_id": slice_id,
|
|
3745
4269
|
"status": status,
|
|
3746
4270
|
"result_path": str(result_path),
|
|
4271
|
+
"result_json_path": str(result_json_path),
|
|
3747
4272
|
"mirror_path": str(mirror_path),
|
|
3748
4273
|
"artifact": artifact,
|
|
3749
4274
|
"slice_checkpoint": slice_checkpoint,
|
|
@@ -3752,8 +4277,10 @@ class ArtifactService:
|
|
|
3752
4277
|
"summary_checkpoint": parent_summary_checkpoint,
|
|
3753
4278
|
"summary_path": str(summary_path),
|
|
3754
4279
|
"manifest": manifest,
|
|
4280
|
+
"analysis_baseline_inventory": baseline_inventory,
|
|
3755
4281
|
"interaction": interaction,
|
|
3756
4282
|
"research_state": research_state,
|
|
4283
|
+
"evaluation_summary": normalized_evaluation_summary,
|
|
3757
4284
|
"completed": True,
|
|
3758
4285
|
"returned_to_branch": parent_branch,
|
|
3759
4286
|
"returned_to_worktree_root": str(parent_worktree_root),
|
|
@@ -4660,7 +5187,7 @@ class ArtifactService:
|
|
|
4660
5187
|
return targets
|
|
4661
5188
|
|
|
4662
5189
|
def _connectors_config(self) -> dict[str, Any]:
|
|
4663
|
-
return ConfigManager(self.home).
|
|
5190
|
+
return ConfigManager(self.home).load_named_normalized("connectors")
|
|
4664
5191
|
|
|
4665
5192
|
@staticmethod
|
|
4666
5193
|
def _delivery_policy(connectors: dict[str, Any]) -> str:
|