psystack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. psystack/__init__.py +3 -0
  2. psystack/__main__.py +5 -0
  3. psystack/adapters/__init__.py +0 -0
  4. psystack/adapters/f1/__init__.py +0 -0
  5. psystack/adapters/f1/controllers.py +56 -0
  6. psystack/adapters/f1/degrade.py +31 -0
  7. psystack/adapters/f1/env.py +48 -0
  8. psystack/adapters/f1/factory.py +182 -0
  9. psystack/adapters/f1/live_viewer.py +143 -0
  10. psystack/adapters/f1/planner.py +39 -0
  11. psystack/adapters/f1/signals.py +353 -0
  12. psystack/adapters/f1/world_model.py +75 -0
  13. psystack/adapters/registry.py +35 -0
  14. psystack/cli/__init__.py +0 -0
  15. psystack/cli/app.py +21 -0
  16. psystack/cli/version_check.py +32 -0
  17. psystack/cli/wizard/__init__.py +3 -0
  18. psystack/cli/wizard/discovery.py +65 -0
  19. psystack/cli/wizard/models.py +38 -0
  20. psystack/cli/wizard/questions.py +174 -0
  21. psystack/cli/wizard/review.py +54 -0
  22. psystack/cli/wizard/service.py +181 -0
  23. psystack/core/__init__.py +0 -0
  24. psystack/core/config.py +77 -0
  25. psystack/core/contracts.py +124 -0
  26. psystack/core/signal_schema.py +54 -0
  27. psystack/evaluation/__init__.py +0 -0
  28. psystack/evaluation/metrics/__init__.py +22 -0
  29. psystack/evaluation/metrics/offtrack.py +30 -0
  30. psystack/evaluation/metrics/prediction_error.py +71 -0
  31. psystack/evaluation/metrics/progress.py +22 -0
  32. psystack/evaluation/metrics/reward.py +22 -0
  33. psystack/evaluation/metrics/survival.py +22 -0
  34. psystack/models/__init__.py +42 -0
  35. psystack/models/case.py +30 -0
  36. psystack/models/comparison.py +30 -0
  37. psystack/models/episode.py +82 -0
  38. psystack/models/evaluation_result.py +51 -0
  39. psystack/models/event.py +40 -0
  40. psystack/models/evidence.py +18 -0
  41. psystack/models/explanation.py +23 -0
  42. psystack/models/isolation.py +35 -0
  43. psystack/models/manifest.py +24 -0
  44. psystack/models/metric.py +14 -0
  45. psystack/models/project.py +25 -0
  46. psystack/models/run.py +50 -0
  47. psystack/models/signal.py +14 -0
  48. psystack/models/swap.py +25 -0
  49. psystack/pipeline/__init__.py +0 -0
  50. psystack/pipeline/case_io.py +22 -0
  51. psystack/pipeline/compare/__init__.py +4 -0
  52. psystack/pipeline/compare/decision.py +20 -0
  53. psystack/pipeline/compare/execution.py +50 -0
  54. psystack/pipeline/compare/service.py +95 -0
  55. psystack/pipeline/compare/stats.py +60 -0
  56. psystack/pipeline/compare_module.py +259 -0
  57. psystack/pipeline/context.py +194 -0
  58. psystack/pipeline/episodes.py +109 -0
  59. psystack/pipeline/event_extraction.py +253 -0
  60. psystack/pipeline/events/__init__.py +6 -0
  61. psystack/pipeline/events/config.py +41 -0
  62. psystack/pipeline/events/detection.py +231 -0
  63. psystack/pipeline/events/divergence.py +106 -0
  64. psystack/pipeline/isolation/__init__.py +4 -0
  65. psystack/pipeline/isolation/attribution.py +187 -0
  66. psystack/pipeline/isolation/designs.py +35 -0
  67. psystack/pipeline/isolation/executor.py +60 -0
  68. psystack/pipeline/isolation/planner.py +10 -0
  69. psystack/pipeline/live_update.py +59 -0
  70. psystack/pipeline/metrics_util.py +65 -0
  71. psystack/pipeline/paired_runner.py +185 -0
  72. psystack/pipeline/runner.py +107 -0
  73. psystack/pipeline/stages/__init__.py +22 -0
  74. psystack/pipeline/stages/attribute.py +78 -0
  75. psystack/pipeline/stages/base.py +18 -0
  76. psystack/pipeline/stages/compare.py +37 -0
  77. psystack/pipeline/stages/events.py +53 -0
  78. psystack/pipeline/stages/isolate.py +88 -0
  79. psystack/pipeline/stages/report.py +59 -0
  80. psystack/pipeline/staleness.py +33 -0
  81. psystack/pipeline/state.py +31 -0
  82. psystack/pipeline/workspace.py +177 -0
  83. psystack/reporting/__init__.py +0 -0
  84. psystack/reporting/bundle.py +74 -0
  85. psystack/reporting/evidence.py +28 -0
  86. psystack/reporting/renderers/__init__.py +0 -0
  87. psystack/reporting/renderers/console.py +27 -0
  88. psystack/reporting/renderers/html.py +28 -0
  89. psystack/reporting/renderers/json.py +13 -0
  90. psystack/reporting/templates/investigation_report.html.j2 +85 -0
  91. psystack/reporting/templates/report.html.j2 +99 -0
  92. psystack/reporting/types.py +33 -0
  93. psystack/tui/__init__.py +0 -0
  94. psystack/tui/actions.py +78 -0
  95. psystack/tui/app.py +1188 -0
  96. psystack/tui/detection.py +241 -0
  97. psystack/tui/screens/__init__.py +1 -0
  98. psystack/tui/screens/attribution.py +252 -0
  99. psystack/tui/screens/case_history.py +131 -0
  100. psystack/tui/screens/case_verdict.py +657 -0
  101. psystack/tui/screens/command_palette.py +70 -0
  102. psystack/tui/screens/drawers/__init__.py +1 -0
  103. psystack/tui/screens/drawers/context_drawer.py +90 -0
  104. psystack/tui/screens/drawers/evidence_drawer.py +113 -0
  105. psystack/tui/screens/error_modal.py +54 -0
  106. psystack/tui/screens/investigation.py +686 -0
  107. psystack/tui/screens/run_builder.py +492 -0
  108. psystack/tui/screens/workspace_picker.py +69 -0
  109. psystack/tui/services.py +769 -0
  110. psystack/tui/state.py +137 -0
  111. psystack/tui/styles/app.tcss +224 -0
  112. psystack/tui/views/__init__.py +0 -0
  113. psystack/tui/widgets/__init__.py +0 -0
  114. psystack/tui/widgets/action_bar.py +42 -0
  115. psystack/tui/widgets/artifact_list.py +38 -0
  116. psystack/tui/widgets/artifact_preview.py +34 -0
  117. psystack/tui/widgets/attribution_decision_card.py +55 -0
  118. psystack/tui/widgets/case_bar.py +108 -0
  119. psystack/tui/widgets/causal_sequence.py +73 -0
  120. psystack/tui/widgets/comparability_summary.py +48 -0
  121. psystack/tui/widgets/context_rail.py +69 -0
  122. psystack/tui/widgets/effect_table.py +32 -0
  123. psystack/tui/widgets/event_navigator.py +176 -0
  124. psystack/tui/widgets/explanation_card.py +67 -0
  125. psystack/tui/widgets/falsifier_list.py +73 -0
  126. psystack/tui/widgets/focus_signals_strip.py +22 -0
  127. psystack/tui/widgets/help_overlay.py +85 -0
  128. psystack/tui/widgets/isolation_case_detail.py +67 -0
  129. psystack/tui/widgets/isolation_case_table.py +50 -0
  130. psystack/tui/widgets/live_run_monitor.py +337 -0
  131. psystack/tui/widgets/metric_detail.py +93 -0
  132. psystack/tui/widgets/metric_table.py +71 -0
  133. psystack/tui/widgets/progress_summary.py +300 -0
  134. psystack/tui/widgets/run_config_panel.py +163 -0
  135. psystack/tui/widgets/run_monitor.py +91 -0
  136. psystack/tui/widgets/section_title.py +15 -0
  137. psystack/tui/widgets/signal_timeline.py +206 -0
  138. psystack/tui/widgets/status_badge.py +52 -0
  139. psystack/tui/widgets/step_inspector.py +105 -0
  140. psystack/tui/widgets/tier_indicator.py +44 -0
  141. psystack/tui/widgets/track_map.py +137 -0
  142. psystack/tui/widgets/transport_bar.py +152 -0
  143. psystack/tui/widgets/verdict_strip.py +103 -0
  144. psystack-0.1.0.dist-info/METADATA +42 -0
  145. psystack-0.1.0.dist-info/RECORD +149 -0
  146. psystack-0.1.0.dist-info/WHEEL +5 -0
  147. psystack-0.1.0.dist-info/entry_points.txt +5 -0
  148. psystack-0.1.0.dist-info/licenses/LICENSE +21 -0
  149. psystack-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,65 @@
1
+ """Shared metric computation logic used by both compare and isolation pipelines."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from typing import Any
7
+
8
+ from psystack.core.contracts import AdapterFactory
9
+ from psystack.models import MetricResult
10
+
11
+
12
+ def compute_and_filter_metrics(
13
+ episodes: list[dict[str, Any]],
14
+ factory: AdapterFactory,
15
+ world_model: Any = None,
16
+ env: Any = None,
17
+ seed: int = 42,
18
+ skip_metrics: set[str] | None = None,
19
+ ) -> list[MetricResult]:
20
+ """Compute all metrics on episodes, filtering NaN values and optionally skipping metrics.
21
+
22
+ Args:
23
+ episodes: Episode data dicts.
24
+ factory: Adapter factory providing metric plugins.
25
+ world_model: Optional world model for prediction_error metric.
26
+ env: Optional env for prediction_error metric.
27
+ seed: Random seed.
28
+ skip_metrics: Set of metric IDs to skip (e.g. {"prediction_error"}).
29
+
30
+ Returns:
31
+ List of MetricResult with NaN values excluded.
32
+ """
33
+ skip = skip_metrics or set()
34
+
35
+ # Inject world_model/env for prediction_error metric
36
+ if world_model is not None and env is not None:
37
+ for ep in episodes:
38
+ ep["_world_model"] = world_model
39
+ ep["_env"] = env
40
+ ep["_seed"] = seed
41
+
42
+ results = []
43
+ for metric in factory.get_metrics():
44
+ if metric.metric_id() in skip:
45
+ continue
46
+ raw = metric.compute(episodes)
47
+ val = raw["primary_value"]
48
+ if math.isnan(val):
49
+ continue
50
+ results.append(MetricResult(
51
+ metric_id=metric.metric_id(),
52
+ value=val,
53
+ unit=raw.get("unit"),
54
+ higher_is_better=metric.higher_is_better(),
55
+ per_episode=raw.get("per_episode", []),
56
+ breakdown=raw.get("breakdown", {}),
57
+ ))
58
+
59
+ # Clean up injected refs
60
+ for ep in episodes:
61
+ ep.pop("_world_model", None)
62
+ ep.pop("_env", None)
63
+ ep.pop("_seed", None)
64
+
65
+ return results
@@ -0,0 +1,185 @@
1
+ """Paired episode runner — lockstep A/B execution with pair-aware telemetry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from collections.abc import Callable
7
+ from typing import Any
8
+
9
+ import numpy as np
10
+ import torch
11
+
12
+ from psystack.core.contracts import EnvPlugin, PlannerPlugin
13
+ from psystack.pipeline.episodes import _serialize_info
14
+ from psystack.pipeline.live_update import LivePairFrame, LiveStepUpdate
15
+
16
+
17
+ class EvalCancelled(Exception):
18
+ """Raised when the user cancels a running evaluation."""
19
+
20
+
21
+ def run_paired_episodes(
22
+ env_a: EnvPlugin,
23
+ env_b: EnvPlugin,
24
+ planner_a: PlannerPlugin,
25
+ planner_b: PlannerPlugin,
26
+ num_episodes: int,
27
+ seed: int = 42,
28
+ pair_callback: Callable[[LivePairFrame], None] | None = None,
29
+ max_steps: int = 0,
30
+ cancel_event: threading.Event | None = None,
31
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
32
+ """Run N episode pairs in lockstep.
33
+
34
+ Both envs are stepped once per tick. One LivePairFrame emitted per tick.
35
+ If one side finishes early, its update freezes.
36
+
37
+ Returns (episodes_a, episodes_b) in the same format as run_episodes().
38
+ """
39
+ all_episodes_a: list[dict[str, Any]] = []
40
+ all_episodes_b: list[dict[str, Any]] = []
41
+
42
+ for ep_idx in range(num_episodes):
43
+ ep_seed = seed + ep_idx
44
+
45
+ # Deterministic seeding
46
+ np.random.seed(ep_seed)
47
+ torch.manual_seed(ep_seed)
48
+ if torch.cuda.is_available():
49
+ torch.cuda.manual_seed_all(ep_seed)
50
+
51
+ planner_a.reset()
52
+ planner_b.reset()
53
+ obs_a = env_a.reset(seed=ep_seed)
54
+ obs_b = env_b.reset(seed=ep_seed)
55
+
56
+ steps_a: list[dict[str, Any]] = []
57
+ steps_b: list[dict[str, Any]] = []
58
+ total_reward_a = 0.0
59
+ total_reward_b = 0.0
60
+ done_a = False
61
+ done_b = False
62
+ termination_a = "max_steps"
63
+ termination_b = "max_steps"
64
+ info_a: dict[str, Any] = {}
65
+ info_b: dict[str, Any] = {}
66
+
67
+ # Frozen terminal state for the side that finishes first
68
+ last_update_a: LiveStepUpdate | None = None
69
+ last_update_b: LiveStepUpdate | None = None
70
+ # Track last known progress — env may reset to 0 after done=True
71
+ last_progress_a = 0.0
72
+ last_progress_b = 0.0
73
+
74
+ tick = 0
75
+ while not (done_a and done_b):
76
+ if cancel_event is not None and cancel_event.is_set():
77
+ raise EvalCancelled("Evaluation cancelled by user")
78
+ # Step A
79
+ if not done_a:
80
+ car_state_a = env_a.get_car_state()
81
+ progress_a = env_a.get_progress()
82
+ action_a = planner_a.act(obs_a, car_state=car_state_a)
83
+ obs_next_a, reward_a, done_a, info_a = env_a.step(action_a)
84
+ last_progress_a = float(progress_a)
85
+
86
+ action_list_a = action_a.tolist() if hasattr(action_a, "tolist") else list(action_a)
87
+ steps_a.append({
88
+ "obs": {"aux": obs_a["aux"].tolist() if hasattr(obs_a["aux"], "tolist") else obs_a["aux"]},
89
+ "action": action_list_a,
90
+ "reward": float(reward_a),
91
+ "done": done_a,
92
+ "info": _serialize_info(info_a),
93
+ "car_state": car_state_a,
94
+ "track_progress": float(progress_a),
95
+ })
96
+ total_reward_a += reward_a
97
+ obs_a = obs_next_a
98
+
99
+ if done_a and "termination" in info_a:
100
+ termination_a = info_a["termination"]
101
+
102
+ last_update_a = LiveStepUpdate(
103
+ run_id="a", episode_idx=ep_idx, episode_total=num_episodes,
104
+ step=tick, progress=float(progress_a), reward=float(reward_a),
105
+ done=done_a,
106
+ termination=info_a.get("termination") if done_a else None,
107
+ state=car_state_a, action=action_list_a, info=_serialize_info(info_a),
108
+ )
109
+
110
+ if cancel_event is not None and cancel_event.is_set():
111
+ raise EvalCancelled("Evaluation cancelled by user")
112
+ # Step B
113
+ if not done_b:
114
+ car_state_b = env_b.get_car_state()
115
+ progress_b = env_b.get_progress()
116
+ action_b = planner_b.act(obs_b, car_state=car_state_b)
117
+ obs_next_b, reward_b, done_b, info_b = env_b.step(action_b)
118
+ last_progress_b = float(progress_b)
119
+
120
+ action_list_b = action_b.tolist() if hasattr(action_b, "tolist") else list(action_b)
121
+ steps_b.append({
122
+ "obs": {"aux": obs_b["aux"].tolist() if hasattr(obs_b["aux"], "tolist") else obs_b["aux"]},
123
+ "action": action_list_b,
124
+ "reward": float(reward_b),
125
+ "done": done_b,
126
+ "info": _serialize_info(info_b),
127
+ "car_state": car_state_b,
128
+ "track_progress": float(progress_b),
129
+ })
130
+ total_reward_b += reward_b
131
+ obs_b = obs_next_b
132
+
133
+ if done_b and "termination" in info_b:
134
+ termination_b = info_b["termination"]
135
+
136
+ last_update_b = LiveStepUpdate(
137
+ run_id="b", episode_idx=ep_idx, episode_total=num_episodes,
138
+ step=tick, progress=float(progress_b), reward=float(reward_b),
139
+ done=done_b,
140
+ termination=info_b.get("termination") if done_b else None,
141
+ state=car_state_b, action=action_list_b, info=_serialize_info(info_b),
142
+ )
143
+
144
+ # Emit pair frame
145
+ if pair_callback is not None:
146
+ frame = LivePairFrame(
147
+ episode_idx=ep_idx,
148
+ episode_total=num_episodes,
149
+ tick=tick,
150
+ a=last_update_a,
151
+ b=last_update_b,
152
+ both_done=done_a and done_b,
153
+ max_steps=max_steps,
154
+ )
155
+ pair_callback(frame)
156
+
157
+ tick += 1
158
+
159
+ # Build episode records — use last tracked progress, not env.get_progress()
160
+ # which may return 0 if the env auto-resets on done=True
161
+ final_progress_a = last_progress_a
162
+ final_progress_b = last_progress_b
163
+
164
+ all_episodes_a.append({
165
+ "episode_id": f"ep_{ep_idx:04d}",
166
+ "steps": steps_a,
167
+ "total_steps": len(steps_a),
168
+ "final_track_progress": float(final_progress_a),
169
+ "total_reward": float(total_reward_a),
170
+ "termination": termination_a,
171
+ "fastest_lap_time": info_a.get("fastest_lap_time"),
172
+ "lap_count": info_a.get("lap_count", 0),
173
+ })
174
+ all_episodes_b.append({
175
+ "episode_id": f"ep_{ep_idx:04d}",
176
+ "steps": steps_b,
177
+ "total_steps": len(steps_b),
178
+ "final_track_progress": float(final_progress_b),
179
+ "total_reward": float(total_reward_b),
180
+ "termination": termination_b,
181
+ "fastest_lap_time": info_b.get("fastest_lap_time"),
182
+ "lap_count": info_b.get("lap_count", 0),
183
+ })
184
+
185
+ return all_episodes_a, all_episodes_b
@@ -0,0 +1,107 @@
1
+ """Stage runner — orchestrates pipeline stages with progress display."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ from datetime import datetime, timezone
7
+ from typing import Protocol, runtime_checkable
8
+
9
+ from psystack.pipeline.context import RunContext
10
+ from psystack.pipeline.stages.base import Stage
11
+ from psystack.pipeline.state import StageResult
12
+
13
+
14
+ @runtime_checkable
15
+ class StageObserver(Protocol):
16
+ def on_stage_start(self, name: str) -> None: ...
17
+ def on_stage_complete(self, name: str, result: StageResult) -> None: ...
18
+ def on_stage_fail(self, name: str, error: str) -> None: ...
19
+ def on_stage_skip(self, name: str, reason: str) -> None: ...
20
+ def on_stage_reuse(self, name: str) -> None: ...
21
+
22
+
23
+ class _NullObserver:
24
+ """Default no-op observer for CLI usage."""
25
+ def on_stage_start(self, name: str) -> None: pass
26
+ def on_stage_complete(self, name: str, result: StageResult) -> None: pass
27
+ def on_stage_fail(self, name: str, error: str) -> None: pass
28
+ def on_stage_skip(self, name: str, reason: str) -> None: pass
29
+ def on_stage_reuse(self, name: str) -> None: pass
30
+
31
+
32
+ def utc_now() -> str:
33
+ return datetime.now(timezone.utc).isoformat()
34
+
35
+
36
+ def run_stages(
37
+ ctx: RunContext,
38
+ stages: tuple[Stage, ...],
39
+ *,
40
+ observer: StageObserver | None = None,
41
+ ) -> None:
42
+ obs = observer or _NullObserver()
43
+ all_names = [stage.name for stage in stages]
44
+
45
+ # Only use Rich Progress when no observer (CLI mode)
46
+ if observer is None:
47
+ from rich.progress import Progress
48
+ progress_ctx = Progress()
49
+ else:
50
+ progress_ctx = contextlib.nullcontext()
51
+
52
+ with progress_ctx as progress:
53
+ if progress is not None:
54
+ task_id = progress.add_task("PsyStack pipeline", total=len(stages))
55
+
56
+ for stage in stages:
57
+ # 1. Explicit skip
58
+ if stage.name in ctx.skip:
59
+ reason = "explicitly skipped"
60
+ ctx.mark_skipped(stage.name, reason=reason)
61
+ ctx.save_state()
62
+ obs.on_stage_skip(stage.name, reason)
63
+ if progress is not None:
64
+ progress.advance(task_id)
65
+ continue
66
+
67
+ # 2. Outside selected range — preserves existing completed state
68
+ if not ctx.stage_selected(stage.name, all_names):
69
+ reason = "outside selected range"
70
+ ctx.mark_skipped(stage.name, reason=reason)
71
+ ctx.save_state()
72
+ obs.on_stage_skip(stage.name, reason)
73
+ if progress is not None:
74
+ progress.advance(task_id)
75
+ continue
76
+
77
+ # 3. Dependency check
78
+ if not ctx.prereqs_satisfied(stage.requires):
79
+ raise RuntimeError(
80
+ f"Stage '{stage.name}' requires completed stages: {stage.requires}"
81
+ )
82
+
83
+ # 4. Resume: reuse if up-to-date — keeps completed status
84
+ if ctx.resume and stage.is_up_to_date(ctx):
85
+ ctx.mark_reused(stage.name)
86
+ ctx.save_state()
87
+ obs.on_stage_reuse(stage.name)
88
+ if progress is not None:
89
+ progress.advance(task_id)
90
+ continue
91
+
92
+ # 5. Run — fail fast on exception
93
+ try:
94
+ ctx.mark_running(stage.name, started_at=utc_now())
95
+ obs.on_stage_start(stage.name)
96
+ result = stage.run(ctx)
97
+ ctx.mark_completed(stage.name, result=result, finished_at=utc_now())
98
+ obs.on_stage_complete(stage.name, result)
99
+ except Exception as exc:
100
+ ctx.mark_failed(stage.name, error=str(exc), finished_at=utc_now())
101
+ ctx.save_state()
102
+ obs.on_stage_fail(stage.name, str(exc))
103
+ raise
104
+ else:
105
+ ctx.save_state()
106
+ if progress is not None:
107
+ progress.advance(task_id)
@@ -0,0 +1,22 @@
1
+ from psystack.pipeline.stages.attribute import AttributeStage
2
+ from psystack.pipeline.stages.compare import CompareStage
3
+ from psystack.pipeline.stages.events import EventStage
4
+ from psystack.pipeline.stages.isolate import IsolateStage
5
+ from psystack.pipeline.stages.report import ReportStage
6
+
7
+ DEFAULT_PIPELINE = (
8
+ CompareStage(),
9
+ EventStage(),
10
+ IsolateStage(),
11
+ AttributeStage(),
12
+ ReportStage(),
13
+ )
14
+
15
+ __all__ = [
16
+ "CompareStage",
17
+ "EventStage",
18
+ "IsolateStage",
19
+ "AttributeStage",
20
+ "ReportStage",
21
+ "DEFAULT_PIPELINE",
22
+ ]
@@ -0,0 +1,78 @@
1
+ """AttributeStage — computes attribution from compare metrics + isolate swap results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ from pydantic import TypeAdapter
8
+
9
+ from psystack.models.comparison import ComparisonReport
10
+ from psystack.models.isolation import AttributionTable, IsolationResultBundle
11
+ from psystack.pipeline.context import RunContext
12
+ from psystack.pipeline.state import StageResult
13
+
14
+ _ATTR_TABLES = TypeAdapter(list[AttributionTable])
15
+
16
+
17
+ def _attribute_output_is_valid(ctx: RunContext) -> bool:
18
+ path = ctx.output_path("attribute")
19
+ if not path.exists():
20
+ return False
21
+ try:
22
+ _ATTR_TABLES.validate_json(path.read_text())
23
+ return True
24
+ except Exception:
25
+ return False
26
+
27
+
28
+ class AttributeStage:
29
+ name = "attribute"
30
+ requires = ("compare", "isolate")
31
+
32
+ def is_up_to_date(self, ctx: RunContext) -> bool:
33
+ return _attribute_output_is_valid(ctx)
34
+
35
+ def run(self, ctx: RunContext) -> StageResult:
36
+ from psystack.pipeline.isolation.attribution import compute_attribution
37
+
38
+ report = ComparisonReport.model_validate_json(
39
+ (ctx.stage_output_dir / "compare_report.json").read_text()
40
+ )
41
+ regressions = [m for m in report.metrics if m.status == "regression"]
42
+
43
+ if not regressions:
44
+ output = ctx.output_path("attribute")
45
+ output.write_text("[]")
46
+ return StageResult(
47
+ primary_output=str(output),
48
+ output_paths=[str(output)],
49
+ summary="No regressions — no attributions computed",
50
+ metadata={"num_attributions": 0, "skipped_reason": "no_regressions"},
51
+ )
52
+
53
+ # Read structured isolate bundle
54
+ bundle = IsolationResultBundle.model_validate_json(
55
+ ctx.output_path("isolate").read_text()
56
+ )
57
+ swap_results = bundle.swap_results
58
+
59
+ tables: list[AttributionTable] = []
60
+ warnings: list[str] = []
61
+
62
+ for metric in regressions:
63
+ table = compute_attribution(metric, swap_results)
64
+ tables.append(table)
65
+ if table.decision == "not_attributable":
66
+ warnings.append(f"{metric.metric_id} not attributable")
67
+
68
+ # Typed stage output: list[AttributionTable]
69
+ output = ctx.output_path("attribute")
70
+ output.write_text(json.dumps([t.model_dump() for t in tables], indent=2))
71
+
72
+ return StageResult(
73
+ primary_output=str(output),
74
+ output_paths=[str(output)],
75
+ summary=f"{len(tables)} attributions computed",
76
+ warnings=warnings,
77
+ metadata={"num_attributions": len(tables)},
78
+ )
@@ -0,0 +1,18 @@
1
+ """Stage protocol for the pipeline runner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, Protocol
6
+
7
+ from psystack.pipeline.state import StageResult
8
+
9
+ if TYPE_CHECKING:
10
+ from psystack.pipeline.context import RunContext
11
+
12
+
13
+ class Stage(Protocol):
14
+ name: str
15
+ requires: tuple[str, ...]
16
+
17
+ def is_up_to_date(self, ctx: RunContext) -> bool: ...
18
+ def run(self, ctx: RunContext) -> StageResult: ...
@@ -0,0 +1,37 @@
1
+ """CompareStage — runs baseline vs candidate comparison."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from psystack.pipeline.compare import compare_manifests
6
+ from psystack.pipeline.context import RunContext
7
+ from psystack.pipeline.state import StageResult
8
+
9
+
10
+ class CompareStage:
11
+ name = "compare"
12
+ requires = ()
13
+
14
+ def is_up_to_date(self, ctx: RunContext) -> bool:
15
+ report_path = ctx.stage_output_dir / "compare_report.json"
16
+ return report_path.exists()
17
+
18
+ def run(self, ctx: RunContext) -> StageResult:
19
+ report = compare_manifests(
20
+ baseline_manifest=ctx.baseline_manifest,
21
+ candidate_manifest=ctx.candidate_manifest,
22
+ workspace=ctx.workspace,
23
+ factory=ctx.factory,
24
+ n_resamples=ctx.settings.run.bootstrap_resamples,
25
+ alpha=ctx.settings.run.alpha,
26
+ )
27
+
28
+ report_path = ctx.stage_output_dir / "compare_report.json"
29
+ report_path.write_text(report.model_dump_json(indent=2))
30
+
31
+ regressions = [m for m in report.metrics if m.status == "regression"]
32
+ return StageResult(
33
+ primary_output=str(report_path),
34
+ output_paths=[str(report_path)],
35
+ summary=f"{len(report.metrics)} metrics compared, {len(regressions)} regressions",
36
+ metadata={"num_metrics": len(report.metrics), "num_regressions": len(regressions)},
37
+ )
@@ -0,0 +1,53 @@
1
+ """EventStage — detect divergence events from compare output episode data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ from psystack.pipeline.context import RunContext
8
+ from psystack.pipeline.events.config import EventDetectionConfig
9
+ from psystack.pipeline.events.detection import detect_events
10
+ from psystack.pipeline.state import StageResult
11
+
12
+
13
+ class EventStage:
14
+ name = "events"
15
+ requires = ("compare",)
16
+
17
+ def is_up_to_date(self, ctx: RunContext) -> bool:
18
+ events_path = ctx.stage_output_dir / "events.json"
19
+ return events_path.exists()
20
+
21
+ def run(self, ctx: RunContext) -> StageResult:
22
+ # Load episode data from compare stage output directory
23
+ baseline_episodes = self._load_episodes(ctx, "baseline")
24
+ candidate_episodes = self._load_episodes(ctx, "candidate")
25
+
26
+ # Load event config from settings if available
27
+ config = EventDetectionConfig()
28
+ settings = ctx.settings
29
+ if hasattr(settings, "event") and settings.event is not None:
30
+ config = EventDetectionConfig.model_validate(
31
+ settings.event.model_dump()
32
+ )
33
+
34
+ events = detect_events(baseline_episodes, candidate_episodes, config)
35
+
36
+ events_path = ctx.stage_output_dir / "events.json"
37
+ events_data = [e.model_dump() for e in events]
38
+ events_path.write_text(json.dumps(events_data, indent=2))
39
+
40
+ event_types = [e.type for e in events]
41
+ return StageResult(
42
+ primary_output=str(events_path),
43
+ output_paths=[str(events_path)],
44
+ summary=f"{len(events)} events detected: {', '.join(set(event_types)) or 'none'}",
45
+ metadata={"num_events": len(events), "event_types": list(set(event_types))},
46
+ )
47
+
48
+ def _load_episodes(self, ctx: RunContext, condition: str) -> list[dict]:
49
+ """Load episode data from the compare stage output."""
50
+ episodes_path = ctx.workspace / condition / "episodes.json"
51
+ if episodes_path.exists():
52
+ return json.loads(episodes_path.read_text())
53
+ return []
@@ -0,0 +1,88 @@
1
+ """IsolateStage — runs swap tests from a named isolation design."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from psystack.models import SwapTestSpec
6
+ from psystack.models.comparison import ComparisonReport
7
+ from psystack.models.isolation import IsolationResultBundle
8
+ from psystack.pipeline.context import RunContext
9
+ from psystack.pipeline.state import StageResult
10
+
11
+
12
+ def _isolate_output_is_valid(ctx: RunContext) -> bool:
13
+ path = ctx.output_path("isolate")
14
+ if not path.exists():
15
+ return False
16
+ try:
17
+ IsolationResultBundle.model_validate_json(path.read_text())
18
+ return True
19
+ except Exception:
20
+ return False
21
+
22
+
23
+ class IsolateStage:
24
+ name = "isolate"
25
+ requires = ("compare",)
26
+
27
+ def is_up_to_date(self, ctx: RunContext) -> bool:
28
+ return _isolate_output_is_valid(ctx)
29
+
30
+ def run(self, ctx: RunContext) -> StageResult:
31
+ from psystack.pipeline.isolation import build_isolation_plan
32
+ from psystack.pipeline.isolation.executor import execute_swap_test
33
+
34
+ report = ComparisonReport.model_validate_json(
35
+ (ctx.stage_output_dir / "compare_report.json").read_text()
36
+ )
37
+ regressions = [m for m in report.metrics if m.status == "regression"]
38
+
39
+ plan = build_isolation_plan("screening_v1")
40
+
41
+ if not regressions:
42
+ bundle = IsolationResultBundle(
43
+ design=plan.design,
44
+ cases=plan.cases,
45
+ swap_results=[],
46
+ )
47
+ output = ctx.output_path("isolate")
48
+ output.write_text(bundle.model_dump_json(indent=2))
49
+ return StageResult(
50
+ primary_output=str(output),
51
+ output_paths=[str(output)],
52
+ summary="No regressions — no swap tests run",
53
+ metadata={"num_swap_tests": 0, "skipped_reason": "no_regressions"},
54
+ )
55
+
56
+ swap_dir = ctx.workspace / "swap_results"
57
+ swap_dir.mkdir(parents=True, exist_ok=True)
58
+
59
+ swap_results = []
60
+ warnings: list[str] = []
61
+ for case in plan.cases:
62
+ spec = SwapTestSpec(test_id=case.test_id, factors=case.factors)
63
+ result = execute_swap_test(
64
+ spec,
65
+ ctx.baseline_manifest,
66
+ ctx.candidate_manifest,
67
+ ctx.factory,
68
+ )
69
+ swap_results.append(result)
70
+ (swap_dir / f"{case.test_id}.json").write_text(result.model_dump_json(indent=2))
71
+ if result.status == "failed":
72
+ warnings.append(f"Swap test {case.test_id} failed: {result.error}")
73
+
74
+ bundle = IsolationResultBundle(
75
+ design=plan.design,
76
+ cases=plan.cases,
77
+ swap_results=swap_results,
78
+ )
79
+ output = ctx.output_path("isolate")
80
+ output.write_text(bundle.model_dump_json(indent=2))
81
+
82
+ return StageResult(
83
+ primary_output=str(output),
84
+ output_paths=[str(output)] + [str(swap_dir / f"{c.test_id}.json") for c in plan.cases],
85
+ summary=f"{len(swap_results)} swap tests completed",
86
+ warnings=warnings,
87
+ metadata={"num_swap_tests": len(swap_results)},
88
+ )