@researai/deepscientist 1.5.1 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +69 -1
  2. package/bin/ds.js +2239 -153
  3. package/docs/en/00_QUICK_START.md +60 -20
  4. package/docs/en/01_SETTINGS_REFERENCE.md +20 -20
  5. package/docs/en/02_START_RESEARCH_GUIDE.md +11 -11
  6. package/docs/en/03_QQ_CONNECTOR_GUIDE.md +10 -10
  7. package/docs/en/05_TUI_GUIDE.md +1 -1
  8. package/docs/en/09_DOCTOR.md +48 -4
  9. package/docs/en/90_ARCHITECTURE.md +4 -2
  10. package/docs/zh/00_QUICK_START.md +60 -20
  11. package/docs/zh/01_SETTINGS_REFERENCE.md +21 -21
  12. package/docs/zh/02_START_RESEARCH_GUIDE.md +19 -19
  13. package/docs/zh/03_QQ_CONNECTOR_GUIDE.md +10 -10
  14. package/docs/zh/05_TUI_GUIDE.md +1 -1
  15. package/docs/zh/09_DOCTOR.md +46 -4
  16. package/install.sh +125 -8
  17. package/package.json +2 -1
  18. package/pyproject.toml +1 -1
  19. package/src/deepscientist/__init__.py +6 -1
  20. package/src/deepscientist/artifact/service.py +553 -26
  21. package/src/deepscientist/bash_exec/monitor.py +23 -4
  22. package/src/deepscientist/bash_exec/runtime.py +3 -0
  23. package/src/deepscientist/bash_exec/service.py +132 -4
  24. package/src/deepscientist/bridges/base.py +10 -19
  25. package/src/deepscientist/channels/discord_gateway.py +25 -2
  26. package/src/deepscientist/channels/feishu_long_connection.py +41 -3
  27. package/src/deepscientist/channels/qq.py +524 -64
  28. package/src/deepscientist/channels/qq_gateway.py +22 -3
  29. package/src/deepscientist/channels/relay.py +429 -90
  30. package/src/deepscientist/channels/slack_socket.py +29 -5
  31. package/src/deepscientist/channels/telegram_polling.py +25 -2
  32. package/src/deepscientist/channels/whatsapp_local_session.py +32 -4
  33. package/src/deepscientist/cli.py +27 -0
  34. package/src/deepscientist/config/models.py +6 -40
  35. package/src/deepscientist/config/service.py +165 -156
  36. package/src/deepscientist/connector_profiles.py +346 -0
  37. package/src/deepscientist/connector_runtime.py +88 -43
  38. package/src/deepscientist/daemon/api/handlers.py +65 -11
  39. package/src/deepscientist/daemon/api/router.py +4 -2
  40. package/src/deepscientist/daemon/app.py +772 -219
  41. package/src/deepscientist/doctor.py +69 -2
  42. package/src/deepscientist/gitops/diff.py +3 -0
  43. package/src/deepscientist/home.py +25 -2
  44. package/src/deepscientist/mcp/context.py +3 -1
  45. package/src/deepscientist/mcp/server.py +66 -7
  46. package/src/deepscientist/migration.py +114 -0
  47. package/src/deepscientist/prompts/builder.py +71 -3
  48. package/src/deepscientist/qq_profiles.py +186 -0
  49. package/src/deepscientist/quest/layout.py +1 -0
  50. package/src/deepscientist/quest/service.py +70 -12
  51. package/src/deepscientist/quest/stage_views.py +46 -0
  52. package/src/deepscientist/runners/codex.py +2 -0
  53. package/src/deepscientist/shared.py +44 -17
  54. package/src/prompts/connectors/lingzhu.md +3 -0
  55. package/src/prompts/connectors/qq.md +42 -2
  56. package/src/prompts/system.md +123 -10
  57. package/src/skills/analysis-campaign/SKILL.md +35 -6
  58. package/src/skills/baseline/SKILL.md +73 -32
  59. package/src/skills/decision/SKILL.md +4 -3
  60. package/src/skills/experiment/SKILL.md +28 -6
  61. package/src/skills/finalize/SKILL.md +5 -2
  62. package/src/skills/idea/SKILL.md +2 -2
  63. package/src/skills/intake-audit/SKILL.md +2 -2
  64. package/src/skills/rebuttal/SKILL.md +4 -2
  65. package/src/skills/review/SKILL.md +4 -2
  66. package/src/skills/scout/SKILL.md +2 -2
  67. package/src/skills/write/SKILL.md +2 -2
  68. package/src/tui/package.json +1 -1
  69. package/src/ui/dist/assets/{AiManusChatView-w5lF2Ttt.js → AiManusChatView-qzChi9uh.js} +67 -94
  70. package/src/ui/dist/assets/{AnalysisPlugin-DJOED79I.js → AnalysisPlugin-CcC_-UqN.js} +1 -1
  71. package/src/ui/dist/assets/{AutoFigurePlugin-DaG61Y0M.js → AutoFigurePlugin-DD8LkJLe.js} +5 -5
  72. package/src/ui/dist/assets/{CliPlugin-CV4LqUB_.js → CliPlugin-DJJFfVmW.js} +17 -110
  73. package/src/ui/dist/assets/{CodeEditorPlugin-DylfAea4.js → CodeEditorPlugin-CrjkHNLh.js} +8 -8
  74. package/src/ui/dist/assets/{CodeViewerPlugin-F7saY0LM.js → CodeViewerPlugin-obnD6G5R.js} +5 -5
  75. package/src/ui/dist/assets/{DocViewerPlugin-COP0c7jf.js → DocViewerPlugin-DB9SUQVd.js} +3 -3
  76. package/src/ui/dist/assets/{GitDiffViewerPlugin-CAS05pT9.js → GitDiffViewerPlugin-DZLlNlD2.js} +1 -1
  77. package/src/ui/dist/assets/{ImageViewerPlugin-Bco1CN_w.js → ImageViewerPlugin-BGwfDZ0Y.js} +5 -5
  78. package/src/ui/dist/assets/{LabCopilotPanel-CvMlCD99.js → LabCopilotPanel-dfLptQcR.js} +10 -10
  79. package/src/ui/dist/assets/{LabPlugin-BYankkE4.js → LabPlugin-CeGjAl3A.js} +1 -1
  80. package/src/ui/dist/assets/{LatexPlugin-LDSMR-t-.js → LatexPlugin-BBJ7kd1V.js} +7 -7
  81. package/src/ui/dist/assets/{MarkdownViewerPlugin-B7o80jgm.js → MarkdownViewerPlugin-DKZi7BcB.js} +4 -4
  82. package/src/ui/dist/assets/{MarketplacePlugin-CM6ZOcpC.js → MarketplacePlugin-C_k-9jD0.js} +3 -3
  83. package/src/ui/dist/assets/{NotebookEditor-Dc61cXmK.js → NotebookEditor-4R88_BMO.js} +1 -1
  84. package/src/ui/dist/assets/{PdfLoader-DWowuQwx.js → PdfLoader-DwEFQLrw.js} +1 -1
  85. package/src/ui/dist/assets/{PdfMarkdownPlugin-BsJM1q_a.js → PdfMarkdownPlugin-D-jdsqF8.js} +3 -3
  86. package/src/ui/dist/assets/{PdfViewerPlugin-DB2eEEFQ.js → PdfViewerPlugin-CmeBGDY0.js} +10 -10
  87. package/src/ui/dist/assets/{SearchPlugin-CraThSvt.js → SearchPlugin-Dlz2WKJ4.js} +1 -1
  88. package/src/ui/dist/assets/{Stepper-CgocRTPq.js → Stepper-ClOgzWM3.js} +1 -1
  89. package/src/ui/dist/assets/{TextViewerPlugin-B1JGhKtd.js → TextViewerPlugin-DDQWxibk.js} +4 -4
  90. package/src/ui/dist/assets/{VNCViewer-CclFC7FM.js → VNCViewer-CJXT0Nm8.js} +9 -9
  91. package/src/ui/dist/assets/{bibtex-D3IKsMl7.js → bibtex-DLr4Rtk4.js} +1 -1
  92. package/src/ui/dist/assets/{code-BP37Xx0p.js → code-DgKK408Y.js} +1 -1
  93. package/src/ui/dist/assets/{file-content-BAJSu-9r.js → file-content-6HBqQnvQ.js} +1 -1
  94. package/src/ui/dist/assets/{file-diff-panel-DUGeCTuy.js → file-diff-panel-Dhu0TbBM.js} +1 -1
  95. package/src/ui/dist/assets/{file-socket-CXc1Ojf7.js → file-socket-CP3iwVZG.js} +1 -1
  96. package/src/ui/dist/assets/{file-utils-2J21jt7M.js → file-utils-BsS-Aw68.js} +1 -1
  97. package/src/ui/dist/assets/{image-CMMmgvcn.js → image-ByeK-Zcv.js} +1 -1
  98. package/src/ui/dist/assets/{index-DmwmJmbW.js → index-BLjo5--a.js} +33610 -31016
  99. package/src/ui/dist/assets/{index-CWgMgpow.js → index-BdsE0uRz.js} +11 -11
  100. package/src/ui/dist/assets/{index-s7aHnNQ4.js → index-C-eX-N6A.js} +1 -1
  101. package/src/ui/dist/assets/{index-KGt-z-dD.css → index-CuQhlrR-.css} +2747 -2
  102. package/src/ui/dist/assets/{index-BaVumsQT.js → index-DyremSIv.js} +2 -2
  103. package/src/ui/dist/assets/{message-square-CQRfX0Am.js → message-square-DnagiLnc.js} +1 -1
  104. package/src/ui/dist/assets/{monaco-B4TbdsrF.js → monaco-4kBFeprs.js} +1 -1
  105. package/src/ui/dist/assets/{popover-B8Rokodk.js → popover-hRCXZzs2.js} +1 -1
  106. package/src/ui/dist/assets/{project-sync-D_i96KH4.js → project-sync-O_85YuP6.js} +1 -1
  107. package/src/ui/dist/assets/{sigma-D12PnzCN.js → sigma-DvKopSnL.js} +1 -1
  108. package/src/ui/dist/assets/{tooltip-B6YrI4aJ.js → tooltip-BmlPc6kc.js} +1 -1
  109. package/src/ui/dist/assets/{trash-Bc8jGp0V.js → trash-n-UvdZFR.js} +1 -1
  110. package/src/ui/dist/assets/{useCliAccess-mXVCYSZ-.js → useCliAccess-WDd3_wIh.js} +1 -1
  111. package/src/ui/dist/assets/{useFileDiffOverlay-Bg6b9H9K.js → useFileDiffOverlay-rXLIL2NF.js} +1 -1
  112. package/src/ui/dist/assets/{wrap-text-Drh5GEnL.js → wrap-text-qIYQ4a_W.js} +1 -1
  113. package/src/ui/dist/assets/{zoom-out-CJj9DZLn.js → zoom-out-fZXCEFsy.js} +1 -1
  114. package/src/ui/dist/index.html +2 -2
  115. package/uv.lock +1155 -0
  116. package/src/ui/dist/assets/LabPlugin-D9jVIo0A.css +0 -2698
@@ -91,6 +91,41 @@ class ArtifactService:
91
91
  self.baselines = BaselineRegistry(home)
92
92
  self.quest_service = QuestService(home)
93
93
 
94
+ def _normalize_evaluation_summary(self, payload: dict[str, Any] | None) -> dict[str, str] | None:
95
+ if not isinstance(payload, dict):
96
+ return None
97
+ normalized: dict[str, str] = {}
98
+ for key in (
99
+ "takeaway",
100
+ "claim_update",
101
+ "baseline_relation",
102
+ "comparability",
103
+ "failure_mode",
104
+ "next_action",
105
+ ):
106
+ value = payload.get(key)
107
+ if value is None:
108
+ continue
109
+ text = str(value).strip()
110
+ if text:
111
+ normalized[key] = text
112
+ return normalized or None
113
+
114
+ def _evaluation_summary_markdown_lines(self, payload: dict[str, Any] | None) -> list[str]:
115
+ normalized = self._normalize_evaluation_summary(payload)
116
+ if not normalized:
117
+ return ["- Not recorded."]
118
+ labels = (
119
+ ("takeaway", "Takeaway"),
120
+ ("claim_update", "Claim Update"),
121
+ ("baseline_relation", "Baseline Relation"),
122
+ ("comparability", "Comparability"),
123
+ ("failure_mode", "Failure Mode"),
124
+ ("next_action", "Next Action"),
125
+ )
126
+ lines = [f"- {label}: {normalized[key]}" for key, label in labels if normalized.get(key)]
127
+ return lines or ["- Not recorded."]
128
+
94
129
  def _workspace_root_for(self, quest_root: Path, workspace_root: Path | None = None) -> Path:
95
130
  if workspace_root is not None:
96
131
  return workspace_root
@@ -387,6 +422,207 @@ class ArtifactService:
387
422
  write_json(path, normalized)
388
423
  return normalized
389
424
 
425
+ def _analysis_baseline_inventory_path(self, quest_root: Path) -> Path:
426
+ return ensure_dir(quest_root / "artifacts" / "baselines") / "analysis_inventory.json"
427
+
428
+ def _read_analysis_baseline_inventory(self, quest_root: Path) -> dict[str, Any]:
429
+ path = self._analysis_baseline_inventory_path(quest_root)
430
+ payload = read_json(path, {})
431
+ if not isinstance(payload, dict):
432
+ payload = {}
433
+ entries = payload.get("entries") if isinstance(payload.get("entries"), list) else []
434
+ return {
435
+ "schema_version": 1,
436
+ "entries": [dict(item) for item in entries if isinstance(item, dict)],
437
+ "updated_at": payload.get("updated_at"),
438
+ }
439
+
440
+ def _write_analysis_baseline_inventory(self, quest_root: Path, payload: dict[str, Any]) -> dict[str, Any]:
441
+ path = self._analysis_baseline_inventory_path(quest_root)
442
+ normalized_entries = payload.get("entries") if isinstance(payload.get("entries"), list) else []
443
+ normalized = {
444
+ "schema_version": 1,
445
+ "entries": [dict(item) for item in normalized_entries if isinstance(item, dict)],
446
+ "updated_at": utc_now(),
447
+ }
448
+ write_json(path, normalized)
449
+ return normalized
450
+
451
+ def _normalize_baseline_root_rel_path(
452
+ self,
453
+ quest_root: Path,
454
+ baseline_root_rel_path: str | None,
455
+ *,
456
+ baseline_id: str | None = None,
457
+ ) -> tuple[str | None, str | None]:
458
+ raw = str(baseline_root_rel_path or "").strip()
459
+ if not raw:
460
+ return None, None
461
+ candidate = Path(raw)
462
+ resolved = candidate.resolve() if candidate.is_absolute() else resolve_within(quest_root, raw)
463
+ if not resolved.exists():
464
+ raise FileNotFoundError(f"Baseline root does not exist: {resolved}")
465
+ try:
466
+ relative = resolved.relative_to(quest_root.resolve()).as_posix()
467
+ except ValueError as exc:
468
+ raise ValueError("`baseline_root_rel_path` must stay within quest_root.") from exc
469
+ parts = Path(relative).parts
470
+ if len(parts) < 3 or parts[0] != "baselines" or parts[1] not in {"local", "imported"}:
471
+ raise ValueError(
472
+ "`baseline_root_rel_path` must live under `baselines/local/<baseline_id>/...` or "
473
+ "`baselines/imported/<baseline_id>/...`."
474
+ )
475
+ normalized_baseline_id = str(baseline_id or parts[2]).strip() or None
476
+ if normalized_baseline_id and parts[2] != normalized_baseline_id:
477
+ raise ValueError(
478
+ f"`baseline_root_rel_path` points to baseline `{parts[2]}`, which does not match `{normalized_baseline_id}`."
479
+ )
480
+ return relative, parts[1]
481
+
482
+ @staticmethod
483
+ def _analysis_baseline_label(payload: dict[str, Any]) -> str:
484
+ baseline_id = str(payload.get("baseline_id") or "baseline").strip() or "baseline"
485
+ parts = [f"`{baseline_id}`"]
486
+ variant_id = str(payload.get("variant_id") or "").strip()
487
+ if variant_id:
488
+ parts.append(f"variant `{variant_id}`")
489
+ benchmark = str(payload.get("benchmark") or "").strip()
490
+ split = str(payload.get("split") or "").strip()
491
+ if benchmark and split:
492
+ parts.append(f"benchmark `{benchmark}` / split `{split}`")
493
+ elif benchmark:
494
+ parts.append(f"benchmark `{benchmark}`")
495
+ elif split:
496
+ parts.append(f"split `{split}`")
497
+ reason = str(payload.get("reason") or "").strip()
498
+ if reason:
499
+ parts.append(f"reason: {reason}")
500
+ return " · ".join(parts)
501
+
502
+ def _normalize_required_baselines(self, quest_root: Path, values: list[object] | None) -> list[dict[str, Any]]:
503
+ normalized: list[dict[str, Any]] = []
504
+ for raw in values or []:
505
+ if not isinstance(raw, dict):
506
+ continue
507
+ baseline_id = str(raw.get("baseline_id") or "").strip()
508
+ if not baseline_id:
509
+ continue
510
+ baseline_root_rel_path, storage_mode = self._normalize_baseline_root_rel_path(
511
+ quest_root,
512
+ raw.get("baseline_root_rel_path"),
513
+ baseline_id=baseline_id,
514
+ )
515
+ normalized.append(
516
+ {
517
+ "baseline_id": baseline_id,
518
+ "variant_id": str(raw.get("variant_id") or "").strip() or None,
519
+ "reason": str(raw.get("reason") or "").strip() or None,
520
+ "benchmark": str(raw.get("benchmark") or "").strip() or None,
521
+ "split": str(raw.get("split") or "").strip() or None,
522
+ "baseline_root_rel_path": baseline_root_rel_path,
523
+ "storage_mode": storage_mode or (str(raw.get("storage_mode") or "").strip() or None),
524
+ "usage_scope": "supplementary",
525
+ }
526
+ )
527
+ return normalized
528
+
529
+ def _normalize_comparison_baselines(self, quest_root: Path, values: list[object] | None) -> list[dict[str, Any]]:
530
+ normalized: list[dict[str, Any]] = []
531
+ for raw in values or []:
532
+ if not isinstance(raw, dict):
533
+ continue
534
+ baseline_id = str(raw.get("baseline_id") or "").strip()
535
+ if not baseline_id:
536
+ continue
537
+ baseline_root_rel_path, storage_mode = self._normalize_baseline_root_rel_path(
538
+ quest_root,
539
+ raw.get("baseline_root_rel_path"),
540
+ baseline_id=baseline_id,
541
+ )
542
+ metrics_summary = (
543
+ normalize_metrics_summary(raw.get("metrics_summary"))
544
+ if isinstance(raw.get("metrics_summary"), dict)
545
+ else {}
546
+ )
547
+ normalized.append(
548
+ {
549
+ "baseline_id": baseline_id,
550
+ "variant_id": str(raw.get("variant_id") or "").strip() or None,
551
+ "benchmark": str(raw.get("benchmark") or "").strip() or None,
552
+ "split": str(raw.get("split") or "").strip() or None,
553
+ "reason": str(raw.get("reason") or "").strip() or None,
554
+ "metrics_summary": metrics_summary,
555
+ "evidence_paths": [
556
+ str(item).strip() for item in (raw.get("evidence_paths") or []) if str(item).strip()
557
+ ],
558
+ "baseline_root_rel_path": baseline_root_rel_path,
559
+ "storage_mode": storage_mode or (str(raw.get("storage_mode") or "").strip() or None),
560
+ "usage_scope": "supplementary",
561
+ "published": bool(raw.get("published", False)),
562
+ "published_entry_id": str(raw.get("published_entry_id") or "").strip() or None,
563
+ "status": str(raw.get("status") or "registered").strip() or "registered",
564
+ }
565
+ )
566
+ return normalized
567
+
568
+ @staticmethod
569
+ def _analysis_inventory_entry_key(payload: dict[str, Any]) -> tuple[str, str, str, str, str, str]:
570
+ origin = dict(payload.get("origin") or {}) if isinstance(payload.get("origin"), dict) else {}
571
+ return (
572
+ str(payload.get("baseline_id") or "").strip(),
573
+ str(payload.get("variant_id") or "").strip(),
574
+ str(origin.get("campaign_id") or "").strip(),
575
+ str(origin.get("slice_id") or "").strip(),
576
+ str(payload.get("benchmark") or "").strip(),
577
+ str(payload.get("split") or "").strip(),
578
+ )
579
+
580
+ @staticmethod
581
+ def _merge_analysis_inventory_entry(existing: dict[str, Any], incoming: dict[str, Any]) -> dict[str, Any]:
582
+ merged = dict(existing)
583
+ for key, value in incoming.items():
584
+ if value is None:
585
+ continue
586
+ if isinstance(value, str) and not value.strip():
587
+ continue
588
+ if isinstance(value, (list, dict)) and not value:
589
+ continue
590
+ merged[key] = value
591
+ merged["updated_at"] = utc_now()
592
+ merged.setdefault("created_at", existing.get("created_at") or incoming.get("created_at") or utc_now())
593
+ return merged
594
+
595
+ def _upsert_analysis_baseline_inventory(self, quest_root: Path, entries: list[dict[str, Any]]) -> dict[str, Any]:
596
+ inventory = self._read_analysis_baseline_inventory(quest_root)
597
+ existing_entries = [dict(item) for item in (inventory.get("entries") or []) if isinstance(item, dict)]
598
+ by_key = {
599
+ self._analysis_inventory_entry_key(item): dict(item)
600
+ for item in existing_entries
601
+ if str(item.get("baseline_id") or "").strip()
602
+ }
603
+ for raw in entries:
604
+ if not isinstance(raw, dict):
605
+ continue
606
+ entry = dict(raw)
607
+ if not str(entry.get("baseline_id") or "").strip():
608
+ continue
609
+ key = self._analysis_inventory_entry_key(entry)
610
+ current = by_key.get(key)
611
+ if current is None:
612
+ stamped = dict(entry)
613
+ stamped.setdefault("created_at", utc_now())
614
+ stamped["updated_at"] = utc_now()
615
+ by_key[key] = stamped
616
+ continue
617
+ by_key[key] = self._merge_analysis_inventory_entry(current, entry)
618
+ normalized = self._write_analysis_baseline_inventory(
619
+ quest_root,
620
+ {
621
+ "entries": list(by_key.values()),
622
+ },
623
+ )
624
+ return normalized
625
+
390
626
  def _paper_root(self, quest_root: Path) -> Path:
391
627
  return ensure_dir(quest_root / "paper")
392
628
 
@@ -405,6 +641,114 @@ class ArtifactService:
405
641
  def _paper_bundle_manifest_path(self, quest_root: Path) -> Path:
406
642
  return self._paper_root(quest_root) / "paper_bundle_manifest.json"
407
643
 
644
+ def _paper_baseline_inventory_path(self, quest_root: Path) -> Path:
645
+ return self._paper_root(quest_root) / "baseline_inventory.json"
646
+
647
+ def _open_source_root(self, quest_root: Path) -> Path:
648
+ return ensure_dir(quest_root / "release" / "open_source")
649
+
650
+ def _open_source_manifest_path(self, quest_root: Path) -> Path:
651
+ return self._open_source_root(quest_root) / "manifest.json"
652
+
653
+ def _open_source_cleanup_plan_path(self, quest_root: Path) -> Path:
654
+ return self._open_source_root(quest_root) / "cleanup_plan.md"
655
+
656
+ def _open_source_include_paths_path(self, quest_root: Path) -> Path:
657
+ return self._open_source_root(quest_root) / "include_paths.json"
658
+
659
+ def _open_source_exclude_paths_path(self, quest_root: Path) -> Path:
660
+ return self._open_source_root(quest_root) / "exclude_paths.json"
661
+
662
+ def _write_paper_baseline_inventory(self, quest_root: Path) -> dict[str, Any]:
663
+ quest_yaml = self.quest_service.read_quest_yaml(quest_root)
664
+ confirmed_baseline_ref = (
665
+ dict(quest_yaml.get("confirmed_baseline_ref") or {})
666
+ if isinstance(quest_yaml.get("confirmed_baseline_ref"), dict)
667
+ else None
668
+ )
669
+ analysis_inventory = self._read_analysis_baseline_inventory(quest_root)
670
+ payload = {
671
+ "schema_version": 1,
672
+ "canonical_baseline_ref": confirmed_baseline_ref,
673
+ "supplementary_baselines": [
674
+ dict(item) for item in (analysis_inventory.get("entries") or []) if isinstance(item, dict)
675
+ ],
676
+ "updated_at": utc_now(),
677
+ }
678
+ write_json(self._paper_baseline_inventory_path(quest_root), payload)
679
+ return payload
680
+
681
+ def _ensure_open_source_prep(
682
+ self,
683
+ quest_root: Path,
684
+ *,
685
+ source_branch: str | None,
686
+ source_bundle_manifest_path: str,
687
+ baseline_inventory_path: str,
688
+ ) -> dict[str, Any]:
689
+ root = self._open_source_root(quest_root)
690
+ cleanup_plan_path = self._open_source_cleanup_plan_path(quest_root)
691
+ include_paths_path = self._open_source_include_paths_path(quest_root)
692
+ exclude_paths_path = self._open_source_exclude_paths_path(quest_root)
693
+ manifest_path = self._open_source_manifest_path(quest_root)
694
+ if not cleanup_plan_path.exists():
695
+ write_text(
696
+ cleanup_plan_path,
697
+ "\n".join(
698
+ [
699
+ "# Open Source Cleanup Plan",
700
+ "",
701
+ "## Goal",
702
+ "",
703
+ "Prepare a clean public code branch from the finalized paper line.",
704
+ "",
705
+ "## Keep",
706
+ "",
707
+ "- Core training / evaluation code needed to reproduce the public results.",
708
+ "",
709
+ "## Remove Or Private",
710
+ "",
711
+ "- Temporary logs, scratch files, local secrets, and unrelated experimental debris.",
712
+ "",
713
+ "## Before Release",
714
+ "",
715
+ "- Confirm README, license, and benchmark instructions are complete.",
716
+ "- Confirm only necessary files remain in scope.",
717
+ "",
718
+ ]
719
+ ).rstrip()
720
+ + "\n",
721
+ )
722
+ if not include_paths_path.exists():
723
+ write_json(include_paths_path, {"paths": []})
724
+ if not exclude_paths_path.exists():
725
+ write_json(exclude_paths_path, {"paths": []})
726
+ existing = read_json(manifest_path, {})
727
+ existing = existing if isinstance(existing, dict) else {}
728
+ manifest = {
729
+ **existing,
730
+ "schema_version": 1,
731
+ "status": str(existing.get("status") or "draft").strip() or "draft",
732
+ "source_branch": str(existing.get("source_branch") or source_branch or "").strip() or None,
733
+ "release_branch": str(existing.get("release_branch") or "").strip() or None,
734
+ "source_bundle_manifest_path": str(
735
+ existing.get("source_bundle_manifest_path") or source_bundle_manifest_path or ""
736
+ ).strip()
737
+ or source_bundle_manifest_path,
738
+ "baseline_inventory_path": str(existing.get("baseline_inventory_path") or baseline_inventory_path or "").strip()
739
+ or baseline_inventory_path,
740
+ "cleanup_plan_path": str(existing.get("cleanup_plan_path") or "release/open_source/cleanup_plan.md").strip()
741
+ or "release/open_source/cleanup_plan.md",
742
+ "include_paths_path": str(existing.get("include_paths_path") or "release/open_source/include_paths.json").strip()
743
+ or "release/open_source/include_paths.json",
744
+ "exclude_paths_path": str(existing.get("exclude_paths_path") or "release/open_source/exclude_paths.json").strip()
745
+ or "release/open_source/exclude_paths.json",
746
+ "created_at": existing.get("created_at") or utc_now(),
747
+ "updated_at": utc_now(),
748
+ }
749
+ write_json(manifest_path, manifest)
750
+ return manifest
751
+
408
752
  def _next_paper_outline_id(self, quest_root: Path) -> str:
409
753
  max_index = 0
410
754
  for root in (self._paper_outline_candidates_root(quest_root), self._paper_outline_revisions_root(quest_root)):
@@ -2367,6 +2711,7 @@ class ArtifactService:
2367
2711
  status: str = "completed",
2368
2712
  baseline_id: str | None = None,
2369
2713
  baseline_variant_id: str | None = None,
2714
+ evaluation_summary: dict[str, Any] | None = None,
2370
2715
  ) -> dict[str, Any]:
2371
2716
  self._require_baseline_gate_open(quest_root, action="record_main_experiment")
2372
2717
  state = self.quest_service.read_research_state(quest_root)
@@ -2445,6 +2790,7 @@ class ArtifactService:
2445
2790
  resolved_config_paths = [str(item).strip() for item in (config_paths or []) if str(item).strip()]
2446
2791
  resolved_notes = [str(item).strip() for item in (notes or []) if str(item).strip()]
2447
2792
  normalized_dataset_scope = str(dataset_scope or "full").strip().lower() or "full"
2793
+ normalized_evaluation_summary = self._normalize_evaluation_summary(evaluation_summary)
2448
2794
  primary = comparisons.get("primary") if isinstance(comparisons, dict) else {}
2449
2795
  primary_metric_id = str(progress_eval.get("primary_metric_id") or comparisons.get("primary_metric_id") or "").strip() or None
2450
2796
  primary_value = primary.get("run_value") if isinstance(primary, dict) else None
@@ -2554,6 +2900,8 @@ class ArtifactService:
2554
2900
  if resolved_notes:
2555
2901
  run_lines.extend(["", "## Notes", ""])
2556
2902
  run_lines.extend([f"- {item}" for item in resolved_notes])
2903
+ run_lines.extend(["", "## Evaluation Summary", ""])
2904
+ run_lines.extend(self._evaluation_summary_markdown_lines(normalized_evaluation_summary))
2557
2905
  run_lines.extend(
2558
2906
  [
2559
2907
  "",
@@ -2602,6 +2950,7 @@ class ArtifactService:
2602
2950
  key: value for key, value in comparisons.items() if key != "primary"
2603
2951
  },
2604
2952
  "progress_eval": progress_eval,
2953
+ "evaluation_summary": normalized_evaluation_summary,
2605
2954
  "delivery_policy": delivery_policy,
2606
2955
  "startup_contract": delivery_policy.get("startup_contract") or None,
2607
2956
  "evidence_paths": resolved_evidence_paths,
@@ -2642,6 +2991,7 @@ class ArtifactService:
2642
2991
  "recommended_next_route": delivery_policy.get("recommended_next_route"),
2643
2992
  "changed_file_count": len(resolved_changed_files),
2644
2993
  "evidence_count": len(resolved_evidence_paths),
2994
+ "evaluation_summary": normalized_evaluation_summary,
2645
2995
  },
2646
2996
  "delivery_policy": delivery_policy,
2647
2997
  "startup_contract": delivery_policy.get("startup_contract") or None,
@@ -2657,6 +3007,7 @@ class ArtifactService:
2657
3007
  key: value for key, value in comparisons.items() if key != "primary"
2658
3008
  },
2659
3009
  "progress_eval": progress_eval,
3010
+ "evaluation_summary": normalized_evaluation_summary,
2660
3011
  "files_changed": resolved_changed_files,
2661
3012
  "evidence_paths": resolved_evidence_paths,
2662
3013
  "verdict": verdict,
@@ -2693,6 +3044,7 @@ class ArtifactService:
2693
3044
  "breakthrough_level": progress_eval.get("breakthrough_level"),
2694
3045
  "need_research_paper": delivery_policy.get("need_research_paper"),
2695
3046
  "recommended_next_route": delivery_policy.get("recommended_next_route"),
3047
+ "evaluation_summary": normalized_evaluation_summary,
2696
3048
  }
2697
3049
  ],
2698
3050
  )
@@ -2715,6 +3067,7 @@ class ArtifactService:
2715
3067
  key: value for key, value in comparisons.items() if key != "primary"
2716
3068
  },
2717
3069
  "progress_eval": progress_eval,
3070
+ "evaluation_summary": normalized_evaluation_summary,
2718
3071
  "delivery_policy": delivery_policy,
2719
3072
  }
2720
3073
 
@@ -2752,6 +3105,7 @@ class ArtifactService:
2752
3105
  normalized_experimental_designs = self._normalize_string_list(experimental_designs)
2753
3106
  normalized_todo_items = self._normalize_campaign_todo_items(todo_items)
2754
3107
  slice_contexts: list[dict[str, Any]] = []
3108
+ inventory_entries: list[dict[str, Any]] = []
2755
3109
  for index, raw in enumerate(slices, start=1):
2756
3110
  slice_id = str(raw.get("slice_id") or generate_id("slice")).strip()
2757
3111
  title = str(raw.get("title") or slice_id).strip() or slice_id
@@ -2783,6 +3137,10 @@ class ArtifactService:
2783
3137
  abandonment_criteria = str(
2784
3138
  raw.get("abandonment_criteria") or matched_todo.get("abandonment_criteria") or ""
2785
3139
  ).strip()
3140
+ required_baselines = self._normalize_required_baselines(
3141
+ quest_root,
3142
+ raw.get("required_baselines") or matched_todo.get("required_baselines"),
3143
+ )
2786
3144
  plan_dir = ensure_dir(worktree_root / "experiments" / "analysis" / campaign_id / slice_id)
2787
3145
  plan_path = plan_dir / "plan.md"
2788
3146
  requirement_lines = [
@@ -2812,33 +3170,44 @@ class ArtifactService:
2812
3170
  "",
2813
3171
  str(raw.get("required_changes") or "").strip() or "TBD",
2814
3172
  "",
2815
- "## Metric Contract",
2816
- "",
2817
- str(raw.get("metric_contract") or "").strip() or "TBD",
2818
- "",
2819
- "## Environment Notes",
2820
- "",
2821
- str(raw.get("environment_notes") or "").strip() or "TBD",
2822
- "",
2823
- "## Must Not Simplify",
2824
- "",
2825
- str(raw.get("must_not_simplify") or "").strip() or "Full dataset / full protocol only unless explicitly approved.",
2826
- "",
2827
- "## Success Criteria",
2828
- "",
2829
- success_criteria or "TBD",
2830
- "",
2831
- "## Abandonment Criteria",
2832
- "",
2833
- abandonment_criteria or "TBD",
2834
- "",
2835
- "## Completion Condition",
2836
- "",
2837
- str(raw.get("completion_condition") or matched_todo.get("completion_condition") or "").strip()
2838
- or str(raw.get("must_not_simplify") or matched_todo.get("must_not_simplify") or "").strip()
2839
- or "Complete the planned analysis slice and mirror the durable result back to the parent branch.",
3173
+ "## Required Baselines",
2840
3174
  "",
2841
3175
  ]
3176
+ if required_baselines:
3177
+ requirement_lines.extend([f"- {self._analysis_baseline_label(item)}" for item in required_baselines])
3178
+ else:
3179
+ requirement_lines.append("- None recorded.")
3180
+ requirement_lines.extend(
3181
+ [
3182
+ "",
3183
+ "## Metric Contract",
3184
+ "",
3185
+ str(raw.get("metric_contract") or "").strip() or "TBD",
3186
+ "",
3187
+ "## Environment Notes",
3188
+ "",
3189
+ str(raw.get("environment_notes") or "").strip() or "TBD",
3190
+ "",
3191
+ "## Must Not Simplify",
3192
+ "",
3193
+ str(raw.get("must_not_simplify") or "").strip() or "Full dataset / full protocol only unless explicitly approved.",
3194
+ "",
3195
+ "## Success Criteria",
3196
+ "",
3197
+ success_criteria or "TBD",
3198
+ "",
3199
+ "## Abandonment Criteria",
3200
+ "",
3201
+ abandonment_criteria or "TBD",
3202
+ "",
3203
+ "## Completion Condition",
3204
+ "",
3205
+ str(raw.get("completion_condition") or matched_todo.get("completion_condition") or "").strip()
3206
+ or str(raw.get("must_not_simplify") or matched_todo.get("must_not_simplify") or "").strip()
3207
+ or "Complete the planned analysis slice and mirror the durable result back to the parent branch.",
3208
+ "",
3209
+ ]
3210
+ )
2842
3211
  requirement_lines.extend(["## Reviewer Item IDs", ""])
2843
3212
  if reviewer_item_ids:
2844
3213
  requirement_lines.extend([f"- `{item}`" for item in reviewer_item_ids])
@@ -2879,9 +3248,31 @@ class ArtifactService:
2879
3248
  "completion_condition": str(
2880
3249
  raw.get("completion_condition") or matched_todo.get("completion_condition") or ""
2881
3250
  ).strip(),
3251
+ "required_baselines": required_baselines,
2882
3252
  "reviewer_item_ids": reviewer_item_ids,
2883
3253
  "manuscript_targets": manuscript_targets,
2884
3254
  }
3255
+ )
3256
+ inventory_entries.extend(
3257
+ [
3258
+ {
3259
+ "baseline_id": item.get("baseline_id"),
3260
+ "variant_id": item.get("variant_id"),
3261
+ "usage_scope": "supplementary",
3262
+ "status": "required",
3263
+ "reason": item.get("reason"),
3264
+ "benchmark": item.get("benchmark"),
3265
+ "split": item.get("split"),
3266
+ "baseline_root_rel_path": item.get("baseline_root_rel_path"),
3267
+ "storage_mode": item.get("storage_mode"),
3268
+ "origin": {
3269
+ "stage": "analysis_campaign",
3270
+ "campaign_id": campaign_id,
3271
+ "slice_id": slice_id,
3272
+ },
3273
+ }
3274
+ for item in required_baselines
3275
+ ]
2885
3276
  )
2886
3277
 
2887
3278
  todo_manifest = {
@@ -2903,6 +3294,7 @@ class ArtifactService:
2903
3294
  "why_now": item.get("why_now") or context.get("why_now"),
2904
3295
  "success_criteria": item.get("success_criteria") or context.get("success_criteria"),
2905
3296
  "abandonment_criteria": item.get("abandonment_criteria") or context.get("abandonment_criteria"),
3297
+ "required_baselines": item.get("required_baselines") or context.get("required_baselines") or [],
2906
3298
  "reviewer_item_ids": item.get("reviewer_item_ids") or context.get("reviewer_item_ids") or [],
2907
3299
  "manuscript_targets": item.get("manuscript_targets") or context.get("manuscript_targets") or [],
2908
3300
  }
@@ -2957,6 +3349,7 @@ class ArtifactService:
2957
3349
  f"- Research question: {item['research_question'] or 'TBD'}",
2958
3350
  f"- Experimental design: {item['experimental_design'] or 'TBD'}",
2959
3351
  f"- Why now: {item['why_now'] or 'TBD'}",
3352
+ f"- Required baselines: {', '.join(self._analysis_baseline_label(entry) for entry in item['required_baselines']) or 'none'}",
2960
3353
  f"- Success criteria: {item['success_criteria'] or 'TBD'}",
2961
3354
  f"- Abandonment criteria: {item['abandonment_criteria'] or 'TBD'}",
2962
3355
  f"- Completion condition: {item['completion_condition'] or item['must_not_simplify'] or 'TBD'}",
@@ -3017,6 +3410,7 @@ class ArtifactService:
3017
3410
  "why_now": item["why_now"],
3018
3411
  "completion_condition": item["completion_condition"] or item["must_not_simplify"],
3019
3412
  "must_not_simplify": item["must_not_simplify"],
3413
+ "required_baselines": item["required_baselines"],
3020
3414
  "success_criteria": item["success_criteria"],
3021
3415
  "abandonment_criteria": item["abandonment_criteria"],
3022
3416
  "reviewer_item_ids": item["reviewer_item_ids"],
@@ -3065,6 +3459,7 @@ class ArtifactService:
3065
3459
  "why_now": item["why_now"],
3066
3460
  "completion_condition": item["completion_condition"] or item["must_not_simplify"],
3067
3461
  "must_not_simplify": item["must_not_simplify"],
3462
+ "required_baselines": item["required_baselines"],
3068
3463
  "success_criteria": item["success_criteria"],
3069
3464
  "abandonment_criteria": item["abandonment_criteria"],
3070
3465
  "reviewer_item_ids": item["reviewer_item_ids"],
@@ -3089,6 +3484,7 @@ class ArtifactService:
3089
3484
  workspace_mode="analysis",
3090
3485
  last_flow_type="analysis_campaign",
3091
3486
  )
3487
+ baseline_inventory = self._upsert_analysis_baseline_inventory(quest_root, inventory_entries) if inventory_entries else None
3092
3488
  self.quest_service.update_settings(self._quest_id(quest_root), active_anchor="analysis-campaign")
3093
3489
  checkpoint_result = self._checkpoint_with_optional_push(
3094
3490
  parent_worktree_root,
@@ -3138,6 +3534,7 @@ class ArtifactService:
3138
3534
  "charter_path": str(charter_path),
3139
3535
  "slices": slice_contexts,
3140
3536
  "manifest": manifest,
3537
+ "analysis_baseline_inventory": baseline_inventory,
3141
3538
  "todo_manifest_path": str(todo_manifest_path),
3142
3539
  "artifact": artifact,
3143
3540
  "checkpoint": checkpoint_result,
@@ -3320,6 +3717,18 @@ class ArtifactService:
3320
3717
  raise ValueError("submit_paper_bundle requires a selected outline or explicit `outline_path`.")
3321
3718
 
3322
3719
  manifest_path = self._paper_bundle_manifest_path(quest_root)
3720
+ baseline_inventory = self._write_paper_baseline_inventory(quest_root)
3721
+ baseline_inventory_path = self._paper_baseline_inventory_path(quest_root)
3722
+ source_branch = (
3723
+ str(self.quest_service.read_research_state(quest_root).get("current_workspace_branch") or "").strip()
3724
+ or current_branch(self._workspace_root_for(quest_root))
3725
+ )
3726
+ open_source_manifest = self._ensure_open_source_prep(
3727
+ quest_root,
3728
+ source_branch=source_branch,
3729
+ source_bundle_manifest_path="paper/paper_bundle_manifest.json",
3730
+ baseline_inventory_path="paper/baseline_inventory.json",
3731
+ )
3323
3732
  manifest = {
3324
3733
  "schema_version": 1,
3325
3734
  "title": str(
@@ -3338,6 +3747,10 @@ class ArtifactService:
3338
3747
  "compile_report_path": str(compile_report_path or "paper/build/compile_report.json").strip() or None,
3339
3748
  "pdf_path": str(pdf_path or "").strip() or None,
3340
3749
  "latex_root_path": str(latex_root_path or "").strip() or None,
3750
+ "baseline_inventory_path": "paper/baseline_inventory.json",
3751
+ "open_source_manifest_path": "release/open_source/manifest.json",
3752
+ "open_source_cleanup_plan_path": str(open_source_manifest.get("cleanup_plan_path") or "").strip()
3753
+ or "release/open_source/cleanup_plan.md",
3341
3754
  "selected_outline_ref": str(selected_outline.get("outline_id") or "").strip() or None,
3342
3755
  "created_at": utc_now(),
3343
3756
  "updated_at": utc_now(),
@@ -3359,10 +3772,14 @@ class ArtifactService:
3359
3772
  "outline_path": manifest.get("outline_path"),
3360
3773
  "draft_path": manifest.get("draft_path"),
3361
3774
  "pdf_path": manifest.get("pdf_path"),
3775
+ "baseline_inventory_path": str(baseline_inventory_path),
3776
+ "open_source_manifest_path": str(self._open_source_manifest_path(quest_root)),
3362
3777
  },
3363
3778
  "details": {
3364
3779
  "title": manifest.get("title"),
3365
3780
  "selected_outline_ref": manifest.get("selected_outline_ref"),
3781
+ "baseline_inventory_count": len(baseline_inventory.get("supplementary_baselines") or []),
3782
+ "open_source_status": open_source_manifest.get("status"),
3366
3783
  },
3367
3784
  },
3368
3785
  checkpoint=False,
@@ -3372,6 +3789,8 @@ class ArtifactService:
3372
3789
  "ok": True,
3373
3790
  "manifest_path": str(manifest_path),
3374
3791
  "manifest": manifest,
3792
+ "baseline_inventory_path": str(baseline_inventory_path),
3793
+ "open_source_manifest_path": str(self._open_source_manifest_path(quest_root)),
3375
3794
  "artifact": artifact,
3376
3795
  }
3377
3796
 
@@ -3394,6 +3813,8 @@ class ArtifactService:
3394
3813
  next_recommendation: str | None = None,
3395
3814
  dataset_scope: str = "full",
3396
3815
  subset_approval_ref: str | None = None,
3816
+ comparison_baselines: list[dict[str, Any]] | None = None,
3817
+ evaluation_summary: dict[str, Any] | None = None,
3397
3818
  ) -> dict[str, Any]:
3398
3819
  state = self.quest_service.read_research_state(quest_root)
3399
3820
  manifest = self._read_analysis_manifest(quest_root, campaign_id)
@@ -3408,16 +3829,19 @@ class ArtifactService:
3408
3829
  evidence_paths = [str(item).strip() for item in (evidence_paths or []) if str(item).strip()]
3409
3830
  deviations = [str(item).strip() for item in (deviations or []) if str(item).strip()]
3410
3831
  metric_rows = [item for item in (metric_rows or []) if isinstance(item, dict)]
3832
+ normalized_comparison_baselines = self._normalize_comparison_baselines(quest_root, comparison_baselines)
3411
3833
  normalized_claim_impact = str(claim_impact or "").strip() or None
3412
3834
  normalized_reviewer_resolution = str(reviewer_resolution or "").strip() or None
3413
3835
  normalized_manuscript_update_hint = str(manuscript_update_hint or "").strip() or None
3414
3836
  normalized_next_recommendation = str(next_recommendation or "").strip() or None
3837
+ normalized_evaluation_summary = self._normalize_evaluation_summary(evaluation_summary)
3415
3838
  slice_worktree_root = Path(str(target.get("worktree_root") or ""))
3416
3839
  parent_worktree_root = Path(str(manifest.get("parent_worktree_root") or ""))
3417
3840
  parent_branch = str(manifest.get("parent_branch") or "")
3418
3841
 
3419
3842
  result_dir = ensure_dir(slice_worktree_root / "experiments" / "analysis" / campaign_id / slice_id)
3420
3843
  result_path = result_dir / "RESULT.md"
3844
+ result_json_path = result_dir / "RESULT.json"
3421
3845
  result_lines = [
3422
3846
  f"# {target.get('title') or slice_id}",
3423
3847
  "",
@@ -3456,6 +3880,10 @@ class ArtifactService:
3456
3880
  "",
3457
3881
  normalized_next_recommendation or "Not recorded.",
3458
3882
  "",
3883
+ "## Evaluation Summary",
3884
+ "",
3885
+ *self._evaluation_summary_markdown_lines(normalized_evaluation_summary),
3886
+ "",
3459
3887
  "## Deviations",
3460
3888
  "",
3461
3889
  ]
@@ -3472,6 +3900,20 @@ class ArtifactService:
3472
3900
  result_lines.extend(["", "## Metric Rows", ""])
3473
3901
  for row in metric_rows:
3474
3902
  result_lines.append(f"- `{row}`")
3903
+ result_lines.extend(["", "## Comparison Baselines", ""])
3904
+ if normalized_comparison_baselines:
3905
+ for entry in normalized_comparison_baselines:
3906
+ result_lines.append(f"- {self._analysis_baseline_label(entry)}")
3907
+ if entry.get("baseline_root_rel_path"):
3908
+ result_lines.append(f" - Root: `{entry['baseline_root_rel_path']}`")
3909
+ if entry.get("metrics_summary"):
3910
+ result_lines.append(f" - Metrics: `{entry['metrics_summary']}`")
3911
+ if entry.get("published"):
3912
+ result_lines.append(
3913
+ f" - Published: `{entry.get('published_entry_id') or entry.get('baseline_id')}`"
3914
+ )
3915
+ else:
3916
+ result_lines.append("- None recorded.")
3475
3917
  if subset_approval_ref:
3476
3918
  result_lines.extend(["", "## Subset Approval", "", f"`{subset_approval_ref}`"])
3477
3919
  write_text(result_path, "\n".join(result_lines).rstrip() + "\n")
@@ -3486,6 +3928,37 @@ class ArtifactService:
3486
3928
  if len(keys) == 1:
3487
3929
  metrics_summary[keys[0]] = row.get(keys[0])
3488
3930
 
3931
+ result_payload = {
3932
+ "schema_version": 1,
3933
+ "result_kind": "analysis_slice",
3934
+ "campaign_id": campaign_id,
3935
+ "slice_id": slice_id,
3936
+ "status": status,
3937
+ "title": target.get("title"),
3938
+ "goal": target.get("goal"),
3939
+ "run_kind": target.get("run_kind"),
3940
+ "required_baselines": target.get("required_baselines") or [],
3941
+ "comparison_baselines": normalized_comparison_baselines,
3942
+ "metrics_summary": metrics_summary,
3943
+ "metric_rows": metric_rows,
3944
+ "dataset_scope": normalized_scope,
3945
+ "subset_approval_ref": subset_approval_ref,
3946
+ "setup": setup.strip() or None,
3947
+ "execution": execution.strip() or None,
3948
+ "results": results.strip() or None,
3949
+ "claim_impact": normalized_claim_impact,
3950
+ "reviewer_resolution": normalized_reviewer_resolution,
3951
+ "manuscript_update_hint": normalized_manuscript_update_hint,
3952
+ "next_recommendation": normalized_next_recommendation,
3953
+ "evaluation_summary": normalized_evaluation_summary,
3954
+ "deviations": deviations,
3955
+ "evidence_paths": evidence_paths,
3956
+ "source_branch": str(target.get("branch") or ""),
3957
+ "source_worktree_root": str(slice_worktree_root),
3958
+ "updated_at": utc_now(),
3959
+ }
3960
+ write_json(result_json_path, result_payload)
3961
+
3489
3962
  mirror_dir = ensure_dir(parent_worktree_root / "experiments" / "analysis-results" / campaign_id)
3490
3963
  mirror_path = mirror_dir / f"{slice_id}.md"
3491
3964
  mirror_lines = [
@@ -3524,7 +3997,17 @@ class ArtifactService:
3524
3997
  "",
3525
3998
  normalized_manuscript_update_hint or "Not recorded.",
3526
3999
  "",
4000
+ "## Evaluation Summary",
4001
+ "",
4002
+ *self._evaluation_summary_markdown_lines(normalized_evaluation_summary),
4003
+ "",
3527
4004
  ]
4005
+ mirror_lines.extend(["## Comparison Baselines", ""])
4006
+ if normalized_comparison_baselines:
4007
+ mirror_lines.extend([f"- {self._analysis_baseline_label(entry)}" for entry in normalized_comparison_baselines])
4008
+ else:
4009
+ mirror_lines.append("- None recorded.")
4010
+ mirror_lines.append("")
3528
4011
  write_text(mirror_path, "\n".join(mirror_lines).rstrip() + "\n")
3529
4012
 
3530
4013
  artifact = self.record(
@@ -3548,6 +4031,7 @@ class ArtifactService:
3548
4031
  "protocol_step": "record",
3549
4032
  "paths": {
3550
4033
  "slice_result_md": str(result_path),
4034
+ "slice_result_json": str(result_json_path),
3551
4035
  "parent_result_md": str(mirror_path),
3552
4036
  },
3553
4037
  "details": {
@@ -3563,7 +4047,11 @@ class ArtifactService:
3563
4047
  "next_recommendation": normalized_next_recommendation,
3564
4048
  "deviations": deviations,
3565
4049
  "evidence_paths": evidence_paths,
4050
+ "required_baselines": target.get("required_baselines") or [],
4051
+ "comparison_baselines": normalized_comparison_baselines,
4052
+ "evaluation_summary": normalized_evaluation_summary,
3566
4053
  },
4054
+ "evaluation_summary": normalized_evaluation_summary,
3567
4055
  },
3568
4056
  checkpoint=False,
3569
4057
  workspace_root=slice_worktree_root,
@@ -3586,11 +4074,14 @@ class ArtifactService:
3586
4074
  updated["status"] = status
3587
4075
  updated["completed_at"] = utc_now()
3588
4076
  updated["result_path"] = str(result_path)
4077
+ updated["result_json_path"] = str(result_json_path)
3589
4078
  updated["mirror_path"] = str(mirror_path)
3590
4079
  updated["claim_impact"] = normalized_claim_impact
3591
4080
  updated["reviewer_resolution"] = normalized_reviewer_resolution
3592
4081
  updated["manuscript_update_hint"] = normalized_manuscript_update_hint
3593
4082
  updated["next_recommendation"] = normalized_next_recommendation
4083
+ updated["comparison_baselines"] = normalized_comparison_baselines
4084
+ updated["evaluation_summary"] = normalized_evaluation_summary
3594
4085
  updated_slices.append(updated)
3595
4086
  next_slice = next((item for item in updated_slices if str(item.get("status") or "") == "pending"), None)
3596
4087
  manifest = self._write_analysis_manifest(
@@ -3601,6 +4092,36 @@ class ArtifactService:
3601
4092
  "slices": updated_slices,
3602
4093
  },
3603
4094
  )
4095
+ baseline_inventory = (
4096
+ self._upsert_analysis_baseline_inventory(
4097
+ quest_root,
4098
+ [
4099
+ {
4100
+ "baseline_id": entry.get("baseline_id"),
4101
+ "variant_id": entry.get("variant_id"),
4102
+ "usage_scope": "supplementary",
4103
+ "status": "registered",
4104
+ "reason": entry.get("reason"),
4105
+ "benchmark": entry.get("benchmark"),
4106
+ "split": entry.get("split"),
4107
+ "baseline_root_rel_path": entry.get("baseline_root_rel_path"),
4108
+ "storage_mode": entry.get("storage_mode"),
4109
+ "metrics_summary": entry.get("metrics_summary"),
4110
+ "evidence_paths": entry.get("evidence_paths"),
4111
+ "published": entry.get("published"),
4112
+ "published_entry_id": entry.get("published_entry_id"),
4113
+ "origin": {
4114
+ "stage": "analysis_campaign",
4115
+ "campaign_id": campaign_id,
4116
+ "slice_id": slice_id,
4117
+ },
4118
+ }
4119
+ for entry in normalized_comparison_baselines
4120
+ ],
4121
+ )
4122
+ if normalized_comparison_baselines
4123
+ else self._read_analysis_baseline_inventory(quest_root)
4124
+ )
3604
4125
 
3605
4126
  if next_slice is not None:
3606
4127
  research_state = self.quest_service.update_research_state(
@@ -3642,14 +4163,17 @@ class ArtifactService:
3642
4163
  "slice_id": slice_id,
3643
4164
  "status": status,
3644
4165
  "result_path": str(result_path),
4166
+ "result_json_path": str(result_json_path),
3645
4167
  "mirror_path": str(mirror_path),
3646
4168
  "artifact": artifact,
3647
4169
  "slice_checkpoint": slice_checkpoint,
3648
4170
  "parent_checkpoint": parent_checkpoint,
3649
4171
  "next_slice": next_slice,
3650
4172
  "manifest": manifest,
4173
+ "analysis_baseline_inventory": baseline_inventory,
3651
4174
  "interaction": interaction,
3652
4175
  "research_state": research_state,
4176
+ "evaluation_summary": normalized_evaluation_summary,
3653
4177
  "completed": False,
3654
4178
  }
3655
4179
 
@@ -3744,6 +4268,7 @@ class ArtifactService:
3744
4268
  "slice_id": slice_id,
3745
4269
  "status": status,
3746
4270
  "result_path": str(result_path),
4271
+ "result_json_path": str(result_json_path),
3747
4272
  "mirror_path": str(mirror_path),
3748
4273
  "artifact": artifact,
3749
4274
  "slice_checkpoint": slice_checkpoint,
@@ -3752,8 +4277,10 @@ class ArtifactService:
3752
4277
  "summary_checkpoint": parent_summary_checkpoint,
3753
4278
  "summary_path": str(summary_path),
3754
4279
  "manifest": manifest,
4280
+ "analysis_baseline_inventory": baseline_inventory,
3755
4281
  "interaction": interaction,
3756
4282
  "research_state": research_state,
4283
+ "evaluation_summary": normalized_evaluation_summary,
3757
4284
  "completed": True,
3758
4285
  "returned_to_branch": parent_branch,
3759
4286
  "returned_to_worktree_root": str(parent_worktree_root),
@@ -4660,7 +5187,7 @@ class ArtifactService:
4660
5187
  return targets
4661
5188
 
4662
5189
  def _connectors_config(self) -> dict[str, Any]:
4663
- return ConfigManager(self.home).load_named("connectors")
5190
+ return ConfigManager(self.home).load_named_normalized("connectors")
4664
5191
 
4665
5192
  @staticmethod
4666
5193
  def _delivery_policy(connectors: dict[str, Any]) -> str: