claude-turing 4.3.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,510 @@
1
+ #!/usr/bin/env python3
2
+ """Automated failure postmortem for the autoresearch pipeline.
3
+
4
+ When experiments stop improving, diagnoses the root cause: search space
5
+ exhaustion, systematic config error, data issue, metric ceiling, or
6
+ noise floor. Produces actionable next steps.
7
+
8
+ Usage:
9
+ python scripts/failure_postmortem.py
10
+ python scripts/failure_postmortem.py --window 10
11
+ python scripts/failure_postmortem.py --auto-trigger 5
12
+ python scripts/failure_postmortem.py --json
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import json
19
+ import sys
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+
23
+ import numpy as np
24
+ import yaml
25
+
26
+ from scripts.turing_io import load_config, load_experiments
27
+
28
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
29
+ DEFAULT_WINDOW = 10
30
+ DEFAULT_AUTO_TRIGGER = 5
31
+
32
+ DIAGNOSIS_TYPES = [
33
+ "search_space_exhaustion",
34
+ "systematic_config_error",
35
+ "data_issue",
36
+ "metric_ceiling",
37
+ "noise_floor",
38
+ ]
39
+
40
+
41
+ # --- Streak Detection ---
42
+
43
+
44
+ def detect_failure_streak(
45
+ experiments: list[dict],
46
+ primary_metric: str,
47
+ lower_is_better: bool = False,
48
+ ) -> dict:
49
+ """Detect how many consecutive experiments failed to improve.
50
+
51
+ Returns:
52
+ Streak info with count, best metric, streak experiments.
53
+ """
54
+ if not experiments:
55
+ return {"streak_length": 0, "best_metric": None, "streak_experiments": []}
56
+
57
+ # Find the best metric value
58
+ best_val = None
59
+ for exp in experiments:
60
+ val = exp.get("metrics", {}).get(primary_metric)
61
+ if val is None:
62
+ continue
63
+ if best_val is None:
64
+ best_val = val
65
+ elif (lower_is_better and val < best_val) or (not lower_is_better and val > best_val):
66
+ best_val = val
67
+
68
+ if best_val is None:
69
+ return {"streak_length": len(experiments), "best_metric": None, "streak_experiments": experiments}
70
+
71
+ # Count consecutive non-improvements from the end
72
+ streak = []
73
+ best_so_far = None
74
+
75
+ for exp in experiments:
76
+ val = exp.get("metrics", {}).get(primary_metric)
77
+ if val is None:
78
+ continue
79
+ if best_so_far is None:
80
+ best_so_far = val
81
+ elif (lower_is_better and val < best_so_far) or (not lower_is_better and val > best_so_far):
82
+ best_so_far = val
83
+ streak = [] # Reset streak on improvement
84
+
85
+ streak.append(exp)
86
+
87
+ # The streak is from last improvement to end
88
+ # Remove the improving experiment itself if it's the first
89
+ if streak and streak[0].get("metrics", {}).get(primary_metric) == best_so_far:
90
+ streak = streak[1:]
91
+
92
+ return {
93
+ "streak_length": len(streak),
94
+ "best_metric": best_val,
95
+ "streak_experiments": streak,
96
+ }
97
+
98
+
99
+ # --- Diagnosis Functions ---
100
+
101
+
102
+ def diagnose_search_space_exhaustion(
103
+ streak_experiments: list[dict],
104
+ primary_metric: str,
105
+ ) -> dict:
106
+ """Check if experiments cluster in a small config region."""
107
+ if len(streak_experiments) < 3:
108
+ return {"score": 0, "evidence": "Too few experiments for diagnosis"}
109
+
110
+ # Extract hyperparameters from streak
111
+ all_params = {}
112
+ for exp in streak_experiments:
113
+ config = exp.get("config", {})
114
+ hyperparams = config.get("hyperparams", {})
115
+ for k, v in hyperparams.items():
116
+ if isinstance(v, (int, float)) and not isinstance(v, bool):
117
+ all_params.setdefault(k, []).append(float(v))
118
+
119
+ if not all_params:
120
+ return {"score": 0, "evidence": "No numeric hyperparameters found"}
121
+
122
+ # Measure coefficient of variation for each param
123
+ low_variance_params = []
124
+ for param, values in all_params.items():
125
+ if len(values) < 2:
126
+ continue
127
+ mean = np.mean(values)
128
+ if abs(mean) < 1e-10:
129
+ continue
130
+ cv = np.std(values) / abs(mean)
131
+ if cv < 0.15: # Less than 15% variation
132
+ low_variance_params.append({"param": param, "cv": round(float(cv), 4), "mean": round(float(mean), 4)})
133
+
134
+ # Check family diversity
135
+ families = set()
136
+ for exp in streak_experiments:
137
+ family = exp.get("family", exp.get("config", {}).get("family", "unknown"))
138
+ families.add(family)
139
+
140
+ score = 0
141
+ evidence = []
142
+
143
+ if len(low_variance_params) > len(all_params) * 0.5:
144
+ score += 0.4
145
+ evidence.append(f"Config variance LOW: {len(low_variance_params)}/{len(all_params)} params within ±15%")
146
+
147
+ if len(families) <= 1:
148
+ score += 0.3
149
+ evidence.append(f"All experiments in same family: {families}")
150
+
151
+ # Check if metrics are clustered
152
+ metrics = [exp.get("metrics", {}).get(primary_metric) for exp in streak_experiments
153
+ if exp.get("metrics", {}).get(primary_metric) is not None]
154
+ if len(metrics) >= 2:
155
+ metric_cv = np.std(metrics) / abs(np.mean(metrics)) if abs(np.mean(metrics)) > 0 else 0
156
+ if metric_cv < 0.02:
157
+ score += 0.3
158
+ evidence.append(f"Metric range very tight (CV={metric_cv:.4f})")
159
+
160
+ return {
161
+ "score": round(score, 2),
162
+ "evidence": evidence if evidence else ["No strong evidence of exhaustion"],
163
+ "low_variance_params": low_variance_params,
164
+ "families": list(families),
165
+ }
166
+
167
+
168
+ def diagnose_systematic_config_error(
169
+ streak_experiments: list[dict],
170
+ primary_metric: str,
171
+ best_metric: float | None,
172
+ ) -> dict:
173
+ """Check if all experiments share a common bad config."""
174
+ if len(streak_experiments) < 3:
175
+ return {"score": 0, "evidence": "Too few experiments"}
176
+
177
+ # Find params that are identical across all streak experiments
178
+ common_params = {}
179
+ first_config = streak_experiments[0].get("config", {}).get("hyperparams", {})
180
+
181
+ for k, v in first_config.items():
182
+ if not isinstance(v, (int, float, str)):
183
+ continue
184
+ all_same = all(
185
+ exp.get("config", {}).get("hyperparams", {}).get(k) == v
186
+ for exp in streak_experiments[1:]
187
+ )
188
+ if all_same:
189
+ common_params[k] = v
190
+
191
+ score = 0
192
+ evidence = []
193
+
194
+ if common_params:
195
+ ratio = len(common_params) / max(len(first_config), 1)
196
+ if ratio > 0.5:
197
+ score += 0.5
198
+ evidence.append(f"{len(common_params)} params unchanged across all {len(streak_experiments)} experiments")
199
+ evidence.append(f"Common: {common_params}")
200
+
201
+ # Check if all experiments are significantly worse than best
202
+ if best_metric is not None:
203
+ streak_metrics = [exp.get("metrics", {}).get(primary_metric) for exp in streak_experiments
204
+ if exp.get("metrics", {}).get(primary_metric) is not None]
205
+ if streak_metrics:
206
+ avg_gap = abs(np.mean(streak_metrics) - best_metric)
207
+ if avg_gap > 0.02:
208
+ score += 0.3
209
+ evidence.append(f"Average gap from best: {avg_gap:.4f}")
210
+
211
+ return {"score": round(score, 2), "evidence": evidence or ["No common config error detected"], "common_params": common_params}
212
+
213
+
214
+ def diagnose_data_issue(
215
+ streak_experiments: list[dict],
216
+ primary_metric: str,
217
+ ) -> dict:
218
+ """Check if all models fail similarly regardless of type."""
219
+ if len(streak_experiments) < 3:
220
+ return {"score": 0, "evidence": "Too few experiments"}
221
+
222
+ # Check model type diversity
223
+ model_types = set()
224
+ for exp in streak_experiments:
225
+ mt = exp.get("config", {}).get("model_type", "unknown")
226
+ model_types.add(mt)
227
+
228
+ score = 0
229
+ evidence = []
230
+
231
+ # If multiple model types all fail similarly → data issue
232
+ if len(model_types) >= 2:
233
+ metrics = [exp.get("metrics", {}).get(primary_metric) for exp in streak_experiments
234
+ if exp.get("metrics", {}).get(primary_metric) is not None]
235
+ if len(metrics) >= 2:
236
+ cv = np.std(metrics) / abs(np.mean(metrics)) if abs(np.mean(metrics)) > 0 else 0
237
+ if cv < 0.03:
238
+ score += 0.6
239
+ evidence.append(f"{len(model_types)} different model types all perform similarly (CV={cv:.4f})")
240
+ evidence.append(f"Model types: {model_types}")
241
+
242
+ return {"score": round(score, 2), "evidence": evidence or ["No data issue pattern detected"], "model_types": list(model_types)}
243
+
244
+
245
+ def diagnose_metric_ceiling(
246
+ streak_experiments: list[dict],
247
+ primary_metric: str,
248
+ best_metric: float | None,
249
+ ) -> dict:
250
+ """Check if metrics are plateauing near a theoretical limit."""
251
+ if best_metric is None:
252
+ return {"score": 0, "evidence": "No best metric available"}
253
+
254
+ score = 0
255
+ evidence = []
256
+
257
+ # Check if best metric is very high (suggesting ceiling)
258
+ if best_metric > 0.95:
259
+ score += 0.4
260
+ evidence.append(f"Current best {primary_metric}={best_metric:.4f} — near theoretical maximum")
261
+
262
+ # Check improvement rate (are improvements getting tiny?)
263
+ metrics = sorted([
264
+ exp.get("metrics", {}).get(primary_metric)
265
+ for exp in streak_experiments
266
+ if exp.get("metrics", {}).get(primary_metric) is not None
267
+ ])
268
+ if len(metrics) >= 3:
269
+ range_val = max(metrics) - min(metrics)
270
+ if range_val < 0.005:
271
+ score += 0.3
272
+ evidence.append(f"Metric range in streak: {range_val:.4f} (< 0.005)")
273
+
274
+ return {"score": round(score, 2), "evidence": evidence or ["No ceiling pattern detected"]}
275
+
276
+
277
+ def diagnose_noise_floor(
278
+ streak_experiments: list[dict],
279
+ primary_metric: str,
280
+ seed_dir: str = "experiments/seed_studies",
281
+ ) -> dict:
282
+ """Check if improvements are within seed variance."""
283
+ score = 0
284
+ evidence = []
285
+
286
+ # Check seed study data for variance estimate
287
+ seed_path = Path(seed_dir)
288
+ seed_variance = None
289
+ if seed_path.exists():
290
+ for f in sorted(seed_path.glob("*.yaml")):
291
+ try:
292
+ with open(f) as fh:
293
+ data = yaml.safe_load(fh)
294
+ if isinstance(data, dict) and "std" in data:
295
+ seed_variance = data["std"]
296
+ except (yaml.YAMLError, OSError):
297
+ continue
298
+
299
+ metrics = [exp.get("metrics", {}).get(primary_metric) for exp in streak_experiments
300
+ if exp.get("metrics", {}).get(primary_metric) is not None]
301
+
302
+ if len(metrics) >= 2:
303
+ streak_range = max(metrics) - min(metrics)
304
+ if seed_variance is not None:
305
+ if streak_range < seed_variance * 2:
306
+ score += 0.7
307
+ evidence.append(f"Streak range ({streak_range:.4f}) < 2x seed std ({seed_variance:.4f})")
308
+ else:
309
+ streak_std = float(np.std(metrics))
310
+ if streak_std < 0.005:
311
+ score += 0.3
312
+ evidence.append(f"Streak std ({streak_std:.4f}) very low — may be noise")
313
+
314
+ return {"score": round(score, 2), "evidence": evidence or ["No noise floor pattern detected"], "seed_variance": seed_variance}
315
+
316
+
317
+ # --- Main Pipeline ---
318
+
319
+
320
+ def run_postmortem(
321
+ window: int = DEFAULT_WINDOW,
322
+ config_path: str = "config.yaml",
323
+ log_path: str = DEFAULT_LOG_PATH,
324
+ seed_dir: str = "experiments/seed_studies",
325
+ ) -> dict:
326
+ """Run failure postmortem analysis.
327
+
328
+ Args:
329
+ window: Number of recent experiments to analyze.
330
+ config_path: Path to config.yaml.
331
+ log_path: Path to experiment log.
332
+ seed_dir: Path to seed study directory.
333
+
334
+ Returns:
335
+ Postmortem report with diagnosis, evidence, and recommendations.
336
+ """
337
+ config = load_config(config_path)
338
+ eval_cfg = config.get("evaluation", {})
339
+ primary_metric = eval_cfg.get("primary_metric", "accuracy")
340
+ lower_is_better = eval_cfg.get("lower_is_better", False)
341
+
342
+ experiments = load_experiments(log_path)
343
+
344
+ if not experiments:
345
+ return {"error": "No experiments found"}
346
+
347
+ # Use last N experiments
348
+ recent = experiments[-window:]
349
+
350
+ streak_info = detect_failure_streak(recent, primary_metric, lower_is_better)
351
+ streak_exps = streak_info["streak_experiments"]
352
+ best_metric = streak_info["best_metric"]
353
+ streak_len = streak_info["streak_length"]
354
+
355
+ if streak_len < 2:
356
+ return {
357
+ "streak_length": streak_len,
358
+ "message": "No significant failure streak detected",
359
+ "best_metric": best_metric,
360
+ "generated_at": datetime.now(timezone.utc).isoformat(),
361
+ }
362
+
363
+ # Run all diagnoses
364
+ diagnoses = {
365
+ "search_space_exhaustion": diagnose_search_space_exhaustion(streak_exps, primary_metric),
366
+ "systematic_config_error": diagnose_systematic_config_error(streak_exps, primary_metric, best_metric),
367
+ "data_issue": diagnose_data_issue(streak_exps, primary_metric),
368
+ "metric_ceiling": diagnose_metric_ceiling(streak_exps, primary_metric, best_metric),
369
+ "noise_floor": diagnose_noise_floor(streak_exps, primary_metric, seed_dir),
370
+ }
371
+
372
+ # Pick the highest-scoring diagnosis
373
+ primary_diagnosis = max(diagnoses.items(), key=lambda d: d[1]["score"])
374
+ diagnosis_name = primary_diagnosis[0]
375
+ diagnosis_data = primary_diagnosis[1]
376
+
377
+ # Generate recommendations
378
+ recommendations = _generate_recommendations(diagnosis_name, diagnosis_data, streak_len)
379
+
380
+ return {
381
+ "streak_length": streak_len,
382
+ "window": window,
383
+ "best_metric": best_metric,
384
+ "primary_metric": primary_metric,
385
+ "primary_diagnosis": diagnosis_name,
386
+ "diagnosis_score": diagnosis_data["score"],
387
+ "diagnosis_evidence": diagnosis_data["evidence"],
388
+ "all_diagnoses": {k: {"score": v["score"]} for k, v in diagnoses.items()},
389
+ "recommendations": recommendations,
390
+ "generated_at": datetime.now(timezone.utc).isoformat(),
391
+ }
392
+
393
+
394
+ def _generate_recommendations(diagnosis: str, data: dict, streak_len: int) -> list[str]:
395
+ """Generate actionable recommendations based on diagnosis."""
396
+ recs = {
397
+ "search_space_exhaustion": [
398
+ "Stop tuning hyperparameters — switch to `/turing:feature` for feature engineering",
399
+ "Try `/turing:ensemble` — combine existing models instead of building new ones",
400
+ "Run `/turing:scale --axis data` — check if more data would help",
401
+ ],
402
+ "systematic_config_error": [
403
+ "Run `/turing:sensitivity` — identify which params actually matter",
404
+ "Check the common config values against sensitivity analysis",
405
+ "Try resetting to the best experiment's config and vary one param at a time",
406
+ ],
407
+ "data_issue": [
408
+ "Run `/turing:leak` — check for data leakage masking real performance",
409
+ "Run `/turing:sanity` — verify data pipeline integrity",
410
+ "Inspect the raw data for quality issues or distribution shift",
411
+ ],
412
+ "metric_ceiling": [
413
+ "Run `/turing:scale` to confirm you've hit the ceiling",
414
+ "Consider shifting to a different metric or task formulation",
415
+ "Try ensemble methods for marginal gains: `/turing:ensemble`",
416
+ ],
417
+ "noise_floor": [
418
+ "Run `/turing:seed` with more seeds to measure true variance",
419
+ "Increase n_runs for each experiment to reduce noise",
420
+ "Consider whether the current metric resolution is sufficient",
421
+ ],
422
+ }
423
+ return recs.get(diagnosis, [f"Investigate the last {streak_len} experiments manually"])
424
+
425
+
426
+ # --- Report Formatting ---
427
+
428
+
429
+ def save_postmortem_report(report: dict, output_dir: str = "experiments/postmortems") -> Path:
430
+ """Save postmortem report to YAML."""
431
+ out_path = Path(output_dir)
432
+ out_path.mkdir(parents=True, exist_ok=True)
433
+ ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
434
+ filepath = out_path / f"postmortem-{ts}.yaml"
435
+ with open(filepath, "w") as f:
436
+ yaml.dump(report, f, default_flow_style=False, sort_keys=False)
437
+ return filepath
438
+
439
+
440
+ def format_postmortem_report(report: dict) -> str:
441
+ """Format postmortem report as readable markdown."""
442
+ if "error" in report:
443
+ return f"ERROR: {report['error']}"
444
+
445
+ if "message" in report:
446
+ return f"No failure streak: {report['message']} (best {report.get('best_metric', 'N/A')})"
447
+
448
+ lines = [
449
+ f"# Failure Postmortem (last {report.get('streak_length', '?')} experiments, 0 improvements)",
450
+ "",
451
+ f"**Diagnosis:** {report.get('primary_diagnosis', 'unknown').upper().replace('_', ' ')}",
452
+ f"**Confidence:** {report.get('diagnosis_score', 0):.0%}",
453
+ "",
454
+ "## Evidence",
455
+ "",
456
+ ]
457
+
458
+ for e in report.get("diagnosis_evidence", []):
459
+ lines.append(f"- {e}")
460
+
461
+ lines.extend(["", "## All Diagnoses", ""])
462
+ for name, data in report.get("all_diagnoses", {}).items():
463
+ score = data.get("score", 0)
464
+ marker = "◀" if name == report.get("primary_diagnosis") else ""
465
+ lines.append(f"- {name.replace('_', ' ')}: {score:.0%} {marker}")
466
+
467
+ lines.extend(["", "## Recommended Actions", ""])
468
+ for i, rec in enumerate(report.get("recommendations", []), 1):
469
+ lines.append(f"{i}. {rec}")
470
+
471
+ lines.extend(["", f"*Generated: {report.get('generated_at', 'N/A')}*"])
472
+ return "\n".join(lines)
473
+
474
+
475
+ # --- CLI ---
476
+
477
+
478
+ def main():
479
+ parser = argparse.ArgumentParser(
480
+ description="Failure postmortem — diagnose why experiments stopped improving"
481
+ )
482
+ parser.add_argument("--window", type=int, default=DEFAULT_WINDOW,
483
+ help="Number of recent experiments to analyze")
484
+ parser.add_argument("--auto-trigger", type=int, default=DEFAULT_AUTO_TRIGGER,
485
+ help="Minimum streak length to trigger postmortem")
486
+ parser.add_argument("--config", default="config.yaml", help="Path to config.yaml")
487
+ parser.add_argument("--log", default=DEFAULT_LOG_PATH, help="Path to experiment log")
488
+ parser.add_argument("--json", action="store_true", help="Output raw JSON")
489
+
490
+ args = parser.parse_args()
491
+
492
+ report = run_postmortem(
493
+ window=args.window,
494
+ config_path=args.config,
495
+ log_path=args.log,
496
+ )
497
+
498
+ if args.json:
499
+ print(json.dumps(report, indent=2))
500
+ else:
501
+ print(format_postmortem_report(report))
502
+
503
+ if "error" not in report and "message" not in report:
504
+ saved = save_postmortem_report(report)
505
+ if not args.json:
506
+ print(f"\nSaved: {saved}")
507
+
508
+
509
+ if __name__ == "__main__":
510
+ main()
@@ -436,6 +436,38 @@ def load_update_history(update_dir: str = "experiments/updates") -> list[dict]:
436
436
  return results
437
437
 
438
438
 
439
+ def load_postmortem_result(postmortem_dir: str = "experiments/postmortems") -> dict | None:
440
+ """Load the most recent postmortem result."""
441
+ path = Path(postmortem_dir)
442
+ if not path.exists():
443
+ return None
444
+ files = sorted(path.glob("postmortem-*.yaml"))
445
+ if not files:
446
+ return None
447
+ try:
448
+ with open(files[-1]) as f:
449
+ data = yaml.safe_load(f)
450
+ return data if isinstance(data, dict) else None
451
+ except (yaml.YAMLError, OSError):
452
+ return None
453
+
454
+
455
+ def load_research_plan(plan_dir: str = "experiments/plans") -> dict | None:
456
+ """Load the most recent research plan."""
457
+ path = Path(plan_dir)
458
+ if not path.exists():
459
+ return None
460
+ files = sorted(path.glob("plan-*.yaml"))
461
+ if not files:
462
+ return None
463
+ try:
464
+ with open(files[-1]) as f:
465
+ data = yaml.safe_load(f)
466
+ return data if isinstance(data, dict) else None
467
+ except (yaml.YAMLError, OSError):
468
+ return None
469
+
470
+
439
471
  def format_brief(
440
472
  campaign: dict,
441
473
  best: dict | None,
@@ -462,6 +494,8 @@ def format_brief(
462
494
  simulation_result: dict | None = None,
463
495
  registry_summary: dict | None = None,
464
496
  update_history: list[dict] | None = None,
497
+ postmortem_result: dict | None = None,
498
+ research_plan: dict | None = None,
465
499
  ) -> str:
466
500
  """Format the research briefing as markdown."""
467
501
  direction = "lower" if lower_is_better else "higher"
@@ -815,6 +849,29 @@ def format_brief(
815
849
  lines.append(f"- {exp_id}: {strategy} — {verdict}")
816
850
  lines.append("")
817
851
 
852
+ # Operational Intelligence section
853
+ if postmortem_result or research_plan:
854
+ lines.extend(["", "## Operational Intelligence", ""])
855
+
856
+ if postmortem_result and "primary_diagnosis" in postmortem_result:
857
+ diagnosis = postmortem_result["primary_diagnosis"].replace("_", " ").title()
858
+ streak = postmortem_result.get("streak_length", "?")
859
+ score = postmortem_result.get("diagnosis_score", 0)
860
+ lines.append(f"**Failure postmortem:** {diagnosis} ({score:.0%} confidence, {streak} experiment streak)")
861
+ recs = postmortem_result.get("recommendations", [])
862
+ if recs:
863
+ lines.append(f" Action: {recs[0]}")
864
+ lines.append("")
865
+
866
+ if research_plan and "plan" in research_plan:
867
+ plan = research_plan["plan"]
868
+ n = plan.get("total_experiments", 0)
869
+ gain = plan.get("expected_gain", 0)
870
+ lines.append(f"**Active research plan:** {n} experiments planned (+{gain} expected gain)")
871
+ for phase in plan.get("phases", [])[:3]:
872
+ lines.append(f" - {phase['label']}: {phase['n_experiments']} experiments")
873
+ lines.append("")
874
+
818
875
  lines.extend([
819
876
  "",
820
877
  "## Recommendations",
@@ -889,6 +946,8 @@ def generate_brief(
889
946
  simulation_result = load_simulation_results()
890
947
  registry_summary = load_registry_summary()
891
948
  update_history = load_update_history()
949
+ postmortem_result = load_postmortem_result()
950
+ research_plan = load_research_plan()
892
951
 
893
952
  return format_brief(
894
953
  campaign, best, trajectory, model_types, hypotheses,
@@ -909,6 +968,8 @@ def generate_brief(
909
968
  simulation_result=simulation_result,
910
969
  registry_summary=registry_summary,
911
970
  update_history=update_history if update_history else None,
971
+ postmortem_result=postmortem_result,
972
+ research_plan=research_plan,
912
973
  )
913
974
 
914
975