PyPI - psystack - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

psystack 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

psystack-0.1.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,98 @@
+Metadata-Version: 2.4
+Name: psystack
+Version: 0.1.1
+Summary: Regression investigation harness for ML pipelines
+Author: Danny Nguyen
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/PsyStack/PsyStack
+Project-URL: Issues, https://github.com/PsyStack/PsyStack/issues
+Keywords: regression,investigation,ml,machine-learning,debugging,pipeline
+Classifier: Development Status :: 4 - Beta
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: typer<1,>=0.9.0
+Requires-Dist: pydantic<3,>=2.0.0
+Requires-Dist: pydantic-settings>=2.0.0
+Requires-Dist: jinja2>=3.1.0
+Requires-Dist: tomli-w>=1.0.0
+Requires-Dist: InquirerPy>=0.3.4
+Requires-Dist: rich>=13.0.0
+Requires-Dist: torch<3,>=2.0.0
+Requires-Dist: numpy<3,>=1.24.0
+Requires-Dist: scipy>=1.11.0
+Requires-Dist: statsmodels>=0.14.0
+Requires-Dist: textual>=0.80.0
+Requires-Dist: tomli>=2.0.0; python_version < "3.11"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: pytest-textual-snapshot; extra == "dev"
+Requires-Dist: hypothesis; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
+Requires-Dist: ruff>=0.4.0; extra == "dev"
+Requires-Dist: mypy>=1.8.0; extra == "dev"
+Dynamic: license-file
+ # PsyStack
+ A regression investigation harness for ML pipelines, latent action systems, and world models.
+ PsyStack runs paired A/B evaluations across ML/world model experiments, detects regressions, and provides an interactive investigation workbench for debugging episode-level divergences.
+ ## What it does
+ - Configure A/B experiment pairs with different weights, planners, or configs
+ - Run paired evaluations with live telemetry
+ - Detect regressions via bootstrap significance testing across 5 metrics
+ - Drill into individual episodes with signal timelines and event detection
+ ## Quickstart
+ Python 3.10+ required.
+ ### Install
+```bash
+pip install psystack
+```
+ ### Prerequisites: PsyStack requires an ML repo with a compatible adapter.
+ The only available adapter is for [f1worldmodel](https://github.com/justinsiek/f1worldmodel).
+ Clone the repo and follow its README to install dependencies.
+ ### Run
+cd into the ML repo project root:
+Example:
+```bash
+  cd f1worldmodel
+  psystack
+```
+ This launches the TUI. From there:
+ 1. Select or create a case.
+ 2. Pick a track, configure Run A (baseline) and Run B (candidate) with different checkpoints or planner settings
+ 3. Run the evaluation
+ 4. View the verdict and drill into episodes to investigate divergences
+ ## Available Adapters
+ | Adapter | Repo | Status |
+ |---------|------|--------|
+ | `f1` | [justinsiek/f1worldmodel](https://github.com/justinsiek/f1worldmodel) | Beta |
+ ## Status
+ Beta. APIs may change.
+ ## License
+ MIT

psystack-0.1.1/README.md ADDED Viewed

@@ -0,0 +1,59 @@
+ # PsyStack
+ A regression investigation harness for ML pipelines, latent action systems, and world models.
+ PsyStack runs paired A/B evaluations across ML/world model experiments, detects regressions, and provides an interactive investigation workbench for debugging episode-level divergences.
+ ## What it does
+ - Configure A/B experiment pairs with different weights, planners, or configs
+ - Run paired evaluations with live telemetry
+ - Detect regressions via bootstrap significance testing across 5 metrics
+ - Drill into individual episodes with signal timelines and event detection
+ ## Quickstart
+ Python 3.10+ required.
+ ### Install
+```bash
+pip install psystack
+```
+ ### Prerequisites: PsyStack requires an ML repo with a compatible adapter.
+ The only available adapter is for [f1worldmodel](https://github.com/justinsiek/f1worldmodel).
+ Clone the repo and follow its README to install dependencies.
+ ### Run
+cd into the ML repo project root:
+Example:
+```bash
+  cd f1worldmodel
+  psystack
+```
+ This launches the TUI. From there:
+ 1. Select or create a case.
+ 2. Pick a track, configure Run A (baseline) and Run B (candidate) with different checkpoints or planner settings
+ 3. Run the evaluation
+ 4. View the verdict and drill into episodes to investigate divergences
+ ## Available Adapters
+ | Adapter | Repo | Status |
+ |---------|------|--------|
+ | `f1` | [justinsiek/f1worldmodel](https://github.com/justinsiek/f1worldmodel) | Beta |
+ ## Status
+ Beta. APIs may change.
+ ## License
+ MIT

{psystack-0.1.0 → psystack-0.1.1}/pyproject.toml RENAMED Viewed

@@ -4,16 +4,15 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "psystack"
-version = "0.1.0"
+version = "0.1.1"
 description = "Regression investigation harness for ML pipelines"
 requires-python = ">=3.10"
-license = {text = "MIT"}
+license = "MIT"
 authors = [{name = "Danny Nguyen"}]
 readme = "README.md"
 keywords = ["regression", "investigation", "ml", "machine-learning", "debugging", "pipeline"]
 classifiers = [
     "Development Status :: 4 - Beta",
-    "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -68,7 +67,7 @@ line-length = 120
 [tool.ruff.lint]
 select = ["E", "F", "I", "UP", "B", "SIM"]
-ignore = ["B008", "B904"]
+ignore = ["B008", "B904", "SIM102", "SIM105", "SIM108", "SIM117", "UP037", "UP038"]
 [tool.mypy]
 python_version = "3.10"

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """Regression psystack harness for ML pipelines."""
-__version__ = "0.1.0"
+__version__ = "0.1.1"

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/controllers.py RENAMED Viewed

@@ -29,7 +29,7 @@ class ScriptedControllerAdapter:
     def _make_controller(self) -> Any:
         """Instantiate the controller, passing track if the constructor accepts it."""
-        sig = inspect.signature(self._controller_cls.__init__)
+        sig = inspect.signature(self._controller_cls)
         if "track" in sig.parameters and self._track is not None:
             return self._controller_cls(self._track)
         return self._controller_cls()
@@ -52,5 +52,5 @@ class ScriptedControllerAdapter:
     def act(self, obs: dict[str, Any], car_state: dict[str, Any] | None = None) -> np.ndarray:
         """Delegate to the underlying controller's __call__."""
         if self._accepts_car_state:
-            return self._controller(obs, car_state=car_state)
-        return self._controller(obs)
+            return self._controller(obs, car_state=car_state)  # type: ignore[no-any-return]
+        return self._controller(obs)  # type: ignore[no-any-return]

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/env.py RENAMED Viewed

@@ -42,7 +42,9 @@ class F1EnvAdapter:
         return self._env.step(action)
     def get_car_state(self) -> dict[str, Any]:
-        return self._env.get_car_state()
+        assert self._env is not None, "configure() must be called first"
+        return self._env.get_car_state()  # type: ignore[no-any-return]
     def get_progress(self) -> float:
-        return self._env.get_progress()
+        assert self._env is not None, "configure() must be called first"
+        return self._env.get_progress()  # type: ignore[no-any-return]

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/signals.py RENAMED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-from typing import Any, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from psystack.core.signal_schema import LivePairTelemetryView, SignalSchema
@@ -10,7 +10,6 @@ if TYPE_CHECKING:
 import math
 SIGNAL_GROUPS: dict[str, list[str]] = {
     "Core": ["steering", "throttle", "brake", "speed", "heading"],
     "LiDAR": ["lidar_front", "lidar_left", "lidar_right", "lidar_min"],
@@ -153,7 +152,7 @@ class F1SignalTranslator:
             "speed_delta": speed_delta,
         }
-    def signal_schema(self) -> "SignalSchema":
+    def signal_schema(self) -> SignalSchema:
         """Return structured signal schema with thresholds."""
         from psystack.core.signal_schema import SignalDef, SignalSchema
@@ -308,7 +307,7 @@ class F1SignalTranslator:
         return rows
-    def format_live_pair(self, frame: "LivePairFrame") -> "LivePairTelemetryView":
+    def format_live_pair(self, frame: LivePairFrame) -> LivePairTelemetryView:
         """Format a LivePairFrame into adapter-specific telemetry view (4C)."""
         from psystack.core.signal_schema import LivePairTelemetryView

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/world_model.py RENAMED Viewed

@@ -20,11 +20,12 @@ class F1WorldModelAdapter:
         from models.world_model import WorldModel
         self._device = device
-        self._model = WorldModel()
+        model = WorldModel()
         state_dict = torch.load(weights_path, map_location=device, weights_only=True)
-        self._model.load_state_dict(state_dict)
-        self._model.to(device)
-        self._model.eval()
+        model.load_state_dict(state_dict)
+        model.to(device)
+        model.eval()
+        self._model = model
     def encode(self, obs: dict[str, Any]) -> Any:
         if self._model is None:
@@ -35,12 +36,14 @@ class F1WorldModelAdapter:
             return self._model.encode(raster, aux)
     def encode_target(self, obs: dict[str, Any]) -> Any:
+        assert self._model is not None, "load() must be called first"
         raster = self._to_raster_tensor(obs)
         aux = self._to_aux_tensor(obs)
         with torch.no_grad():
             return self._model.get_target(raster, aux)
     def predict(self, latent: Any, action: np.ndarray) -> Any:
+        assert self._model is not None, "load() must be called first"
         action_t = torch.tensor(action, dtype=torch.float32, device=self._device)
         if action_t.dim() == 1:
             action_t = action_t.unsqueeze(0)
@@ -48,17 +51,19 @@ class F1WorldModelAdapter:
             return self._model.predict(latent, action_t)
     def predict_progress(self, latent: Any) -> float:
+        assert self._model is not None, "load() must be called first"
         with torch.no_grad():
-            return self._model.progress_head(latent).item()
+            return self._model.progress_head(latent).item()  # type: ignore[no-any-return]
     def predict_offtrack(self, latent: Any) -> float:
+        assert self._model is not None, "load() must be called first"
         with torch.no_grad():
-            return torch.sigmoid(self._model.offtrack_head(latent)).item()
+            return torch.sigmoid(self._model.offtrack_head(latent)).item()  # type: ignore[no-any-return]
     def get_raw_model(self) -> Any:
         return self._model
-    def _to_raster_tensor(self, obs: dict[str, Any]) -> torch.Tensor:
+    def _to_raster_tensor(self, obs: dict[str, Any]) -> Any:
         raster = obs["raster"]
         if isinstance(raster, np.ndarray):
             raster = torch.tensor(raster, dtype=torch.float32, device=self._device)
@@ -66,7 +71,7 @@ class F1WorldModelAdapter:
             raster = raster.unsqueeze(0)
         return raster
-    def _to_aux_tensor(self, obs: dict[str, Any]) -> torch.Tensor:
+    def _to_aux_tensor(self, obs: dict[str, Any]) -> Any:
         aux = obs["aux"]
         if isinstance(aux, np.ndarray):
             aux = torch.tensor(aux, dtype=torch.float32, device=self._device)

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/questions.py RENAMED Viewed

@@ -16,7 +16,7 @@ def prompt_adapter(available: list[str]) -> str:
     """Select adapter from registered adapters."""
     if len(available) == 1:
         return available[0]
-    return inquirer.select(
+    return inquirer.select(  # type: ignore[no-any-return]
         message="Adapter type:",
         choices=available,
         default=available[0],
@@ -42,7 +42,7 @@ def prompt_weights(weights: list[dict[str, Any]], role: str, default_idx: int) -
         {"name": f"{w['name']}  ({w['size_mb']} MB, {w['mtime']})", "value": i}
         for i, w in enumerate(weights)
     ]
-    return inquirer.select(
+    return inquirer.select(  # type: ignore[no-any-return]
         message=f"{role} weight:",
         choices=choices,
         default=default_idx,
@@ -57,7 +57,7 @@ def prompt_change_type() -> ChangeType:
         {"name": "Both weights and planner config", "value": ChangeType.BOTH},
         {"name": "Other / not sure", "value": ChangeType.OTHER},
     ]
-    return inquirer.select(
+    return inquirer.select(  # type: ignore[no-any-return]
         message="What changed between baseline and candidate?",
         choices=choices,
         default=ChangeType.WEIGHTS_ONLY,
@@ -85,7 +85,7 @@ def prompt_env(envs: list[str]) -> str:
         if e.lower() == "monza":
             default = e
             break
-    return inquirer.select(
+    return inquirer.select(  # type: ignore[no-any-return]
         message="Environment:",
         choices=envs,
         default=default,

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/review.py RENAMED Viewed

@@ -48,7 +48,7 @@ def display_review(answers: InitAnswers) -> None:
 def confirm_write() -> bool:
     """Ask user to confirm before writing files."""
-    return inquirer.confirm(
+    return inquirer.confirm(  # type: ignore[no-any-return]
         message="Write config and manifests?",
         default=True,
     ).execute()

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/core/contracts.py RENAMED Viewed

@@ -59,7 +59,7 @@ class SignalTranslator(Protocol):
     ) -> dict[str, float]: ...
     # Optional (checked via hasattr at call sites)
-    def signal_schema(self) -> "SignalSchema": ...
+    def signal_schema(self) -> Any: ...
     def analyze_segment(
         self,

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/__init__.py RENAMED Viewed

@@ -1,10 +1,12 @@
+from psystack.core.contracts import MetricPlugin
 from .offtrack import OffTrackRateMetric
 from .prediction_error import WorldModelPredictionError
 from .progress import ProgressMetric
 from .reward import CumulativeRewardMetric
 from .survival import SurvivalStepsMetric
-ALL_METRICS = [
+ALL_METRICS: list[MetricPlugin] = [
     ProgressMetric(),
     OffTrackRateMetric(),
     SurvivalStepsMetric(),

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/episode.py RENAMED Viewed

@@ -6,7 +6,6 @@ from typing import Literal
 from pydantic import BaseModel, ConfigDict, Field
 # ── Display name mapping ─────────────────────────────────────────────────────
 METRIC_DISPLAY_NAMES: dict[str, str] = {

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/run.py RENAMED Viewed

@@ -35,7 +35,7 @@ class Run(BaseModel):
             planner_config=manifest.planner_config,
             env_config=manifest.env_config,
             seed=manifest.seed,
-            num_episodes=manifest.num_episodes,
+            num_episodes=manifest.num_episodes,  # type: ignore[call-arg]
         )
     def to_manifest(self) -> RunManifest:

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare_module.py RENAMED Viewed

@@ -13,7 +13,6 @@ from pydantic import BaseModel, Field
 from psystack.models.episode import METRIC_DISPLAY_NAMES, EpisodeOutcome, EpisodeRecord
 # ── Models ──────────────────────────────────────────────────────────────────

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/event_extraction.py RENAMED Viewed

@@ -2,7 +2,7 @@
 from __future__ import annotations
-from typing import Any
+from typing import Any, Literal
 from psystack.models.event import Event
 from psystack.models.signal import SignalValue
@@ -175,6 +175,7 @@ def _emit_divergence_window(
     duration = end - start + 1
     # Severity based on duration and peak delta
+    severity: Literal["info", "warning", "critical"]
     if duration > 20 or peak_delta > 0.5:
         severity = "critical"
     elif duration > 8 or peak_delta > 0.3:
@@ -213,12 +214,12 @@ def _extract_risk_spikes(
         worst = max(ot_a, ot_b)
         if worst >= _RISK_THRESHOLD and (i - last_spike_step) > _RISK_COOLDOWN:
-            severity = "critical" if worst >= 8 else "warning"
+            sev: Literal["info", "warning", "critical"] = "critical" if worst >= 8 else "warning"
             events.append(Event(
                 id=f"{episode_id}_risk_spike_{spike_idx}_{i}",
                 type="risk_spike",
                 step=i,
-                severity=severity,
+                severity=sev,
                 score=worst / 10.0,
                 active_signals=[SignalValue(name="offtrack_risk", value=worst)],
             ))

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/paired_runner.py RENAMED Viewed

@@ -104,7 +104,7 @@ def run_paired_episodes(
                     step=tick, progress=float(progress_a), reward=float(reward_a),
                     done=done_a,
                     termination=info_a.get("termination") if done_a else None,
-                    state=car_state_a, action=action_list_a, info=_serialize_info(info_a),
+                    state=car_state_a, action=action_list_a, info=_serialize_info(info_a),  # type: ignore[arg-type]
                 )
             if cancel_event is not None and cancel_event.is_set():
@@ -138,7 +138,7 @@ def run_paired_episodes(
                     step=tick, progress=float(progress_b), reward=float(reward_b),
                     done=done_b,
                     termination=info_b.get("termination") if done_b else None,
-                    state=car_state_b, action=action_list_b, info=_serialize_info(info_b),
+                    state=car_state_b, action=action_list_b, info=_serialize_info(info_b),  # type: ignore[arg-type]
                 )
             # Emit pair frame

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/runner.py RENAMED Viewed

@@ -3,8 +3,9 @@
 from __future__ import annotations
 import contextlib
+from collections.abc import Sequence
 from datetime import datetime, timezone
-from typing import Protocol, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 from psystack.pipeline.context import RunContext
 from psystack.pipeline.stages.base import Stage
@@ -35,7 +36,7 @@ def utc_now() -> str:
 def run_stages(
     ctx: RunContext,
-    stages: tuple[Stage, ...],
+    stages: Sequence[Stage],
     *,
     observer: StageObserver | None = None,
 ) -> None:
@@ -43,6 +44,7 @@ def run_stages(
     all_names = [stage.name for stage in stages]
     # Only use Rich Progress when no observer (CLI mode)
+    progress_ctx: Any
     if observer is None:
         from rich.progress import Progress
         progress_ctx = Progress()

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/events.py RENAMED Viewed

@@ -49,5 +49,5 @@ class EventStage:
         """Load episode data from the compare stage output."""
         episodes_path = ctx.workspace / condition / "episodes.json"
         if episodes_path.exists():
-            return json.loads(episodes_path.read_text())
+            return json.loads(episodes_path.read_text())  # type: ignore[no-any-return]
         return []

{psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/workspace.py RENAMED Viewed

@@ -25,7 +25,6 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 # -- Error Categories (D-07) --
 # These document the 5 error categories from D-07. Used as the error_category
 # argument to save_failed_attempt(). Callers should use these constants rather
@@ -72,7 +71,7 @@ def read_workspace_state(workspace: Path) -> dict[str, Any]:
     """Read workspace_state.json. Returns default state if missing."""
     state_path = workspace / "workspace_state.json"
     if state_path.exists():
-        return json.loads(state_path.read_text())
+        return json.loads(state_path.read_text())  # type: ignore[no-any-return]
     return {"case_state": "draft", "attempts": []}
@@ -150,7 +149,7 @@ def load_result(workspace: Path) -> dict[str, Any] | None:
     result_path = workspace / "analysis" / "result.json"
     if not result_path.exists():
         return None
-    return json.loads(result_path.read_text())
+    return json.loads(result_path.read_text())  # type: ignore[no-any-return]
 class _NumpyEncoder(json.JSONEncoder):

psystack 0.1.0__tar.gz → 0.1.1__tar.gz

psystack 0.1.0tar.gz → 0.1.1tar.gz