psystack 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- psystack-0.1.1/PKG-INFO +98 -0
- psystack-0.1.1/README.md +59 -0
- {psystack-0.1.0 → psystack-0.1.1}/pyproject.toml +3 -4
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/__init__.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/controllers.py +3 -3
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/env.py +4 -2
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/signals.py +3 -4
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/world_model.py +13 -8
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/questions.py +4 -4
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/review.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/core/contracts.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/__init__.py +3 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/episode.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/run.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare_module.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/event_extraction.py +4 -3
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/paired_runner.py +2 -2
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/runner.py +4 -2
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/events.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/workspace.py +2 -3
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/app.py +37 -30
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/case_history.py +2 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/case_verdict.py +8 -6
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/drawers/context_drawer.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/drawers/evidence_drawer.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/investigation.py +5 -6
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/run_builder.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/services.py +15 -23
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/action_bar.py +4 -2
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/case_bar.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/causal_sequence.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/event_navigator.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/falsifier_list.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/help_overlay.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/isolation_case_table.py +1 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/live_run_monitor.py +2 -2
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/metric_detail.py +3 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/metric_table.py +3 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/progress_summary.py +5 -6
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/status_badge.py +3 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/step_inspector.py +6 -6
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/tier_indicator.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/track_map.py +0 -1
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/verdict_strip.py +1 -1
- psystack-0.1.1/src/psystack.egg-info/PKG-INFO +98 -0
- psystack-0.1.0/PKG-INFO +0 -42
- psystack-0.1.0/README.md +0 -2
- psystack-0.1.0/src/psystack.egg-info/PKG-INFO +0 -42
- {psystack-0.1.0 → psystack-0.1.1}/LICENSE +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/setup.cfg +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/__main__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/degrade.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/factory.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/live_viewer.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/f1/planner.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/adapters/registry.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/app.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/version_check.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/discovery.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/models.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/cli/wizard/service.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/core/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/core/config.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/core/signal_schema.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/offtrack.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/prediction_error.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/progress.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/reward.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/evaluation/metrics/survival.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/case.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/comparison.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/evaluation_result.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/event.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/evidence.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/explanation.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/isolation.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/manifest.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/metric.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/project.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/signal.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/models/swap.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/case_io.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare/decision.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare/execution.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare/service.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/compare/stats.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/context.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/episodes.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/events/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/events/config.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/events/detection.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/events/divergence.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/isolation/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/isolation/attribution.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/isolation/designs.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/isolation/executor.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/isolation/planner.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/live_update.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/metrics_util.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/attribute.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/base.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/compare.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/isolate.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/stages/report.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/staleness.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/pipeline/state.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/bundle.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/evidence.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/renderers/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/renderers/console.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/renderers/html.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/renderers/json.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/templates/investigation_report.html.j2 +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/templates/report.html.j2 +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/reporting/types.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/actions.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/detection.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/attribution.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/command_palette.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/drawers/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/error_modal.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/screens/workspace_picker.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/state.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/styles/app.tcss +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/views/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/__init__.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/artifact_list.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/artifact_preview.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/attribution_decision_card.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/comparability_summary.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/context_rail.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/effect_table.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/explanation_card.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/focus_signals_strip.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/isolation_case_detail.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/run_config_panel.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/run_monitor.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/section_title.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/signal_timeline.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack/tui/widgets/transport_bar.py +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack.egg-info/SOURCES.txt +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack.egg-info/dependency_links.txt +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack.egg-info/entry_points.txt +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack.egg-info/requires.txt +0 -0
- {psystack-0.1.0 → psystack-0.1.1}/src/psystack.egg-info/top_level.txt +0 -0
psystack-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: psystack
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Regression investigation harness for ML pipelines
|
|
5
|
+
Author: Danny Nguyen
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/PsyStack/PsyStack
|
|
8
|
+
Project-URL: Issues, https://github.com/PsyStack/PsyStack/issues
|
|
9
|
+
Keywords: regression,investigation,ml,machine-learning,debugging,pipeline
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: typer<1,>=0.9.0
|
|
19
|
+
Requires-Dist: pydantic<3,>=2.0.0
|
|
20
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
21
|
+
Requires-Dist: jinja2>=3.1.0
|
|
22
|
+
Requires-Dist: tomli-w>=1.0.0
|
|
23
|
+
Requires-Dist: InquirerPy>=0.3.4
|
|
24
|
+
Requires-Dist: rich>=13.0.0
|
|
25
|
+
Requires-Dist: torch<3,>=2.0.0
|
|
26
|
+
Requires-Dist: numpy<3,>=1.24.0
|
|
27
|
+
Requires-Dist: scipy>=1.11.0
|
|
28
|
+
Requires-Dist: statsmodels>=0.14.0
|
|
29
|
+
Requires-Dist: textual>=0.80.0
|
|
30
|
+
Requires-Dist: tomli>=2.0.0; python_version < "3.11"
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-textual-snapshot; extra == "dev"
|
|
34
|
+
Requires-Dist: hypothesis; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
36
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# PsyStack
|
|
41
|
+
|
|
42
|
+
A regression investigation harness for ML pipelines, latent action systems, and world models.
|
|
43
|
+
|
|
44
|
+
PsyStack runs paired A/B evaluations across ML/world model experiments, detects regressions, and provides an interactive investigation workbench for debugging episode-level divergences.
|
|
45
|
+
|
|
46
|
+
## What it does
|
|
47
|
+
|
|
48
|
+
- Configure A/B experiment pairs with different weights, planners, or configs
|
|
49
|
+
- Run paired evaluations with live telemetry
|
|
50
|
+
- Detect regressions via bootstrap significance testing across 5 metrics
|
|
51
|
+
- Drill into individual episodes with signal timelines and event detection
|
|
52
|
+
|
|
53
|
+
## Quickstart
|
|
54
|
+
|
|
55
|
+
Python 3.10+ required.
|
|
56
|
+
|
|
57
|
+
### Install
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install psystack
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Prerequisites: PsyStack requires an ML repo with a compatible adapter.
|
|
64
|
+
|
|
65
|
+
The only available adapter is for [f1worldmodel](https://github.com/justinsiek/f1worldmodel).
|
|
66
|
+
|
|
67
|
+
Clone the repo and follow its README to install dependencies.
|
|
68
|
+
|
|
69
|
+
### Run
|
|
70
|
+
|
|
71
|
+
cd into the ML repo project root:
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
```bash
|
|
75
|
+
cd f1worldmodel
|
|
76
|
+
psystack
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
This launches the TUI. From there:
|
|
80
|
+
|
|
81
|
+
1. Select or create a case.
|
|
82
|
+
2. Pick a track, configure Run A (baseline) and Run B (candidate) with different checkpoints or planner settings
|
|
83
|
+
3. Run the evaluation
|
|
84
|
+
4. View the verdict and drill into episodes to investigate divergences
|
|
85
|
+
|
|
86
|
+
## Available Adapters
|
|
87
|
+
|
|
88
|
+
| Adapter | Repo | Status |
|
|
89
|
+
|---------|------|--------|
|
|
90
|
+
| `f1` | [justinsiek/f1worldmodel](https://github.com/justinsiek/f1worldmodel) | Beta |
|
|
91
|
+
|
|
92
|
+
## Status
|
|
93
|
+
|
|
94
|
+
Beta. APIs may change.
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
MIT
|
psystack-0.1.1/README.md
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# PsyStack
|
|
2
|
+
|
|
3
|
+
A regression investigation harness for ML pipelines, latent action systems, and world models.
|
|
4
|
+
|
|
5
|
+
PsyStack runs paired A/B evaluations across ML/world model experiments, detects regressions, and provides an interactive investigation workbench for debugging episode-level divergences.
|
|
6
|
+
|
|
7
|
+
## What it does
|
|
8
|
+
|
|
9
|
+
- Configure A/B experiment pairs with different weights, planners, or configs
|
|
10
|
+
- Run paired evaluations with live telemetry
|
|
11
|
+
- Detect regressions via bootstrap significance testing across 5 metrics
|
|
12
|
+
- Drill into individual episodes with signal timelines and event detection
|
|
13
|
+
|
|
14
|
+
## Quickstart
|
|
15
|
+
|
|
16
|
+
Python 3.10+ required.
|
|
17
|
+
|
|
18
|
+
### Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install psystack
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Prerequisites: PsyStack requires an ML repo with a compatible adapter.
|
|
25
|
+
|
|
26
|
+
The only available adapter is for [f1worldmodel](https://github.com/justinsiek/f1worldmodel).
|
|
27
|
+
|
|
28
|
+
Clone the repo and follow its README to install dependencies.
|
|
29
|
+
|
|
30
|
+
### Run
|
|
31
|
+
|
|
32
|
+
cd into the ML repo project root:
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
```bash
|
|
36
|
+
cd f1worldmodel
|
|
37
|
+
psystack
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
This launches the TUI. From there:
|
|
41
|
+
|
|
42
|
+
1. Select or create a case.
|
|
43
|
+
2. Pick a track, configure Run A (baseline) and Run B (candidate) with different checkpoints or planner settings
|
|
44
|
+
3. Run the evaluation
|
|
45
|
+
4. View the verdict and drill into episodes to investigate divergences
|
|
46
|
+
|
|
47
|
+
## Available Adapters
|
|
48
|
+
|
|
49
|
+
| Adapter | Repo | Status |
|
|
50
|
+
|---------|------|--------|
|
|
51
|
+
| `f1` | [justinsiek/f1worldmodel](https://github.com/justinsiek/f1worldmodel) | Beta |
|
|
52
|
+
|
|
53
|
+
## Status
|
|
54
|
+
|
|
55
|
+
Beta. APIs may change.
|
|
56
|
+
|
|
57
|
+
## License
|
|
58
|
+
|
|
59
|
+
MIT
|
|
@@ -4,16 +4,15 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "psystack"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.1"
|
|
8
8
|
description = "Regression investigation harness for ML pipelines"
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
|
-
license =
|
|
10
|
+
license = "MIT"
|
|
11
11
|
authors = [{name = "Danny Nguyen"}]
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
keywords = ["regression", "investigation", "ml", "machine-learning", "debugging", "pipeline"]
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Development Status :: 4 - Beta",
|
|
16
|
-
"License :: OSI Approved :: MIT License",
|
|
17
16
|
"Programming Language :: Python :: 3",
|
|
18
17
|
"Programming Language :: Python :: 3.10",
|
|
19
18
|
"Programming Language :: Python :: 3.11",
|
|
@@ -68,7 +67,7 @@ line-length = 120
|
|
|
68
67
|
|
|
69
68
|
[tool.ruff.lint]
|
|
70
69
|
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
71
|
-
ignore = ["B008", "B904"]
|
|
70
|
+
ignore = ["B008", "B904", "SIM102", "SIM105", "SIM108", "SIM117", "UP037", "UP038"]
|
|
72
71
|
|
|
73
72
|
[tool.mypy]
|
|
74
73
|
python_version = "3.10"
|
|
@@ -29,7 +29,7 @@ class ScriptedControllerAdapter:
|
|
|
29
29
|
|
|
30
30
|
def _make_controller(self) -> Any:
|
|
31
31
|
"""Instantiate the controller, passing track if the constructor accepts it."""
|
|
32
|
-
sig = inspect.signature(self._controller_cls
|
|
32
|
+
sig = inspect.signature(self._controller_cls)
|
|
33
33
|
if "track" in sig.parameters and self._track is not None:
|
|
34
34
|
return self._controller_cls(self._track)
|
|
35
35
|
return self._controller_cls()
|
|
@@ -52,5 +52,5 @@ class ScriptedControllerAdapter:
|
|
|
52
52
|
def act(self, obs: dict[str, Any], car_state: dict[str, Any] | None = None) -> np.ndarray:
|
|
53
53
|
"""Delegate to the underlying controller's __call__."""
|
|
54
54
|
if self._accepts_car_state:
|
|
55
|
-
return self._controller(obs, car_state=car_state)
|
|
56
|
-
return self._controller(obs)
|
|
55
|
+
return self._controller(obs, car_state=car_state) # type: ignore[no-any-return]
|
|
56
|
+
return self._controller(obs) # type: ignore[no-any-return]
|
|
@@ -42,7 +42,9 @@ class F1EnvAdapter:
|
|
|
42
42
|
return self._env.step(action)
|
|
43
43
|
|
|
44
44
|
def get_car_state(self) -> dict[str, Any]:
|
|
45
|
-
|
|
45
|
+
assert self._env is not None, "configure() must be called first"
|
|
46
|
+
return self._env.get_car_state() # type: ignore[no-any-return]
|
|
46
47
|
|
|
47
48
|
def get_progress(self) -> float:
|
|
48
|
-
|
|
49
|
+
assert self._env is not None, "configure() must be called first"
|
|
50
|
+
return self._env.get_progress() # type: ignore[no-any-return]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
7
|
if TYPE_CHECKING:
|
|
8
8
|
from psystack.core.signal_schema import LivePairTelemetryView, SignalSchema
|
|
@@ -10,7 +10,6 @@ if TYPE_CHECKING:
|
|
|
10
10
|
|
|
11
11
|
import math
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
SIGNAL_GROUPS: dict[str, list[str]] = {
|
|
15
14
|
"Core": ["steering", "throttle", "brake", "speed", "heading"],
|
|
16
15
|
"LiDAR": ["lidar_front", "lidar_left", "lidar_right", "lidar_min"],
|
|
@@ -153,7 +152,7 @@ class F1SignalTranslator:
|
|
|
153
152
|
"speed_delta": speed_delta,
|
|
154
153
|
}
|
|
155
154
|
|
|
156
|
-
def signal_schema(self) ->
|
|
155
|
+
def signal_schema(self) -> SignalSchema:
|
|
157
156
|
"""Return structured signal schema with thresholds."""
|
|
158
157
|
from psystack.core.signal_schema import SignalDef, SignalSchema
|
|
159
158
|
|
|
@@ -308,7 +307,7 @@ class F1SignalTranslator:
|
|
|
308
307
|
|
|
309
308
|
return rows
|
|
310
309
|
|
|
311
|
-
def format_live_pair(self, frame:
|
|
310
|
+
def format_live_pair(self, frame: LivePairFrame) -> LivePairTelemetryView:
|
|
312
311
|
"""Format a LivePairFrame into adapter-specific telemetry view (4C)."""
|
|
313
312
|
from psystack.core.signal_schema import LivePairTelemetryView
|
|
314
313
|
|
|
@@ -20,11 +20,12 @@ class F1WorldModelAdapter:
|
|
|
20
20
|
from models.world_model import WorldModel
|
|
21
21
|
|
|
22
22
|
self._device = device
|
|
23
|
-
|
|
23
|
+
model = WorldModel()
|
|
24
24
|
state_dict = torch.load(weights_path, map_location=device, weights_only=True)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
model.load_state_dict(state_dict)
|
|
26
|
+
model.to(device)
|
|
27
|
+
model.eval()
|
|
28
|
+
self._model = model
|
|
28
29
|
|
|
29
30
|
def encode(self, obs: dict[str, Any]) -> Any:
|
|
30
31
|
if self._model is None:
|
|
@@ -35,12 +36,14 @@ class F1WorldModelAdapter:
|
|
|
35
36
|
return self._model.encode(raster, aux)
|
|
36
37
|
|
|
37
38
|
def encode_target(self, obs: dict[str, Any]) -> Any:
|
|
39
|
+
assert self._model is not None, "load() must be called first"
|
|
38
40
|
raster = self._to_raster_tensor(obs)
|
|
39
41
|
aux = self._to_aux_tensor(obs)
|
|
40
42
|
with torch.no_grad():
|
|
41
43
|
return self._model.get_target(raster, aux)
|
|
42
44
|
|
|
43
45
|
def predict(self, latent: Any, action: np.ndarray) -> Any:
|
|
46
|
+
assert self._model is not None, "load() must be called first"
|
|
44
47
|
action_t = torch.tensor(action, dtype=torch.float32, device=self._device)
|
|
45
48
|
if action_t.dim() == 1:
|
|
46
49
|
action_t = action_t.unsqueeze(0)
|
|
@@ -48,17 +51,19 @@ class F1WorldModelAdapter:
|
|
|
48
51
|
return self._model.predict(latent, action_t)
|
|
49
52
|
|
|
50
53
|
def predict_progress(self, latent: Any) -> float:
|
|
54
|
+
assert self._model is not None, "load() must be called first"
|
|
51
55
|
with torch.no_grad():
|
|
52
|
-
return self._model.progress_head(latent).item()
|
|
56
|
+
return self._model.progress_head(latent).item() # type: ignore[no-any-return]
|
|
53
57
|
|
|
54
58
|
def predict_offtrack(self, latent: Any) -> float:
|
|
59
|
+
assert self._model is not None, "load() must be called first"
|
|
55
60
|
with torch.no_grad():
|
|
56
|
-
return torch.sigmoid(self._model.offtrack_head(latent)).item()
|
|
61
|
+
return torch.sigmoid(self._model.offtrack_head(latent)).item() # type: ignore[no-any-return]
|
|
57
62
|
|
|
58
63
|
def get_raw_model(self) -> Any:
|
|
59
64
|
return self._model
|
|
60
65
|
|
|
61
|
-
def _to_raster_tensor(self, obs: dict[str, Any]) ->
|
|
66
|
+
def _to_raster_tensor(self, obs: dict[str, Any]) -> Any:
|
|
62
67
|
raster = obs["raster"]
|
|
63
68
|
if isinstance(raster, np.ndarray):
|
|
64
69
|
raster = torch.tensor(raster, dtype=torch.float32, device=self._device)
|
|
@@ -66,7 +71,7 @@ class F1WorldModelAdapter:
|
|
|
66
71
|
raster = raster.unsqueeze(0)
|
|
67
72
|
return raster
|
|
68
73
|
|
|
69
|
-
def _to_aux_tensor(self, obs: dict[str, Any]) ->
|
|
74
|
+
def _to_aux_tensor(self, obs: dict[str, Any]) -> Any:
|
|
70
75
|
aux = obs["aux"]
|
|
71
76
|
if isinstance(aux, np.ndarray):
|
|
72
77
|
aux = torch.tensor(aux, dtype=torch.float32, device=self._device)
|
|
@@ -16,7 +16,7 @@ def prompt_adapter(available: list[str]) -> str:
|
|
|
16
16
|
"""Select adapter from registered adapters."""
|
|
17
17
|
if len(available) == 1:
|
|
18
18
|
return available[0]
|
|
19
|
-
return inquirer.select(
|
|
19
|
+
return inquirer.select( # type: ignore[no-any-return]
|
|
20
20
|
message="Adapter type:",
|
|
21
21
|
choices=available,
|
|
22
22
|
default=available[0],
|
|
@@ -42,7 +42,7 @@ def prompt_weights(weights: list[dict[str, Any]], role: str, default_idx: int) -
|
|
|
42
42
|
{"name": f"{w['name']} ({w['size_mb']} MB, {w['mtime']})", "value": i}
|
|
43
43
|
for i, w in enumerate(weights)
|
|
44
44
|
]
|
|
45
|
-
return inquirer.select(
|
|
45
|
+
return inquirer.select( # type: ignore[no-any-return]
|
|
46
46
|
message=f"{role} weight:",
|
|
47
47
|
choices=choices,
|
|
48
48
|
default=default_idx,
|
|
@@ -57,7 +57,7 @@ def prompt_change_type() -> ChangeType:
|
|
|
57
57
|
{"name": "Both weights and planner config", "value": ChangeType.BOTH},
|
|
58
58
|
{"name": "Other / not sure", "value": ChangeType.OTHER},
|
|
59
59
|
]
|
|
60
|
-
return inquirer.select(
|
|
60
|
+
return inquirer.select( # type: ignore[no-any-return]
|
|
61
61
|
message="What changed between baseline and candidate?",
|
|
62
62
|
choices=choices,
|
|
63
63
|
default=ChangeType.WEIGHTS_ONLY,
|
|
@@ -85,7 +85,7 @@ def prompt_env(envs: list[str]) -> str:
|
|
|
85
85
|
if e.lower() == "monza":
|
|
86
86
|
default = e
|
|
87
87
|
break
|
|
88
|
-
return inquirer.select(
|
|
88
|
+
return inquirer.select( # type: ignore[no-any-return]
|
|
89
89
|
message="Environment:",
|
|
90
90
|
choices=envs,
|
|
91
91
|
default=default,
|
|
@@ -48,7 +48,7 @@ def display_review(answers: InitAnswers) -> None:
|
|
|
48
48
|
|
|
49
49
|
def confirm_write() -> bool:
|
|
50
50
|
"""Ask user to confirm before writing files."""
|
|
51
|
-
return inquirer.confirm(
|
|
51
|
+
return inquirer.confirm( # type: ignore[no-any-return]
|
|
52
52
|
message="Write config and manifests?",
|
|
53
53
|
default=True,
|
|
54
54
|
).execute()
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
from psystack.core.contracts import MetricPlugin
|
|
2
|
+
|
|
1
3
|
from .offtrack import OffTrackRateMetric
|
|
2
4
|
from .prediction_error import WorldModelPredictionError
|
|
3
5
|
from .progress import ProgressMetric
|
|
4
6
|
from .reward import CumulativeRewardMetric
|
|
5
7
|
from .survival import SurvivalStepsMetric
|
|
6
8
|
|
|
7
|
-
ALL_METRICS = [
|
|
9
|
+
ALL_METRICS: list[MetricPlugin] = [
|
|
8
10
|
ProgressMetric(),
|
|
9
11
|
OffTrackRateMetric(),
|
|
10
12
|
SurvivalStepsMetric(),
|
|
@@ -35,7 +35,7 @@ class Run(BaseModel):
|
|
|
35
35
|
planner_config=manifest.planner_config,
|
|
36
36
|
env_config=manifest.env_config,
|
|
37
37
|
seed=manifest.seed,
|
|
38
|
-
num_episodes=manifest.num_episodes,
|
|
38
|
+
num_episodes=manifest.num_episodes, # type: ignore[call-arg]
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
def to_manifest(self) -> RunManifest:
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any, Literal
|
|
6
6
|
|
|
7
7
|
from psystack.models.event import Event
|
|
8
8
|
from psystack.models.signal import SignalValue
|
|
@@ -175,6 +175,7 @@ def _emit_divergence_window(
|
|
|
175
175
|
duration = end - start + 1
|
|
176
176
|
|
|
177
177
|
# Severity based on duration and peak delta
|
|
178
|
+
severity: Literal["info", "warning", "critical"]
|
|
178
179
|
if duration > 20 or peak_delta > 0.5:
|
|
179
180
|
severity = "critical"
|
|
180
181
|
elif duration > 8 or peak_delta > 0.3:
|
|
@@ -213,12 +214,12 @@ def _extract_risk_spikes(
|
|
|
213
214
|
worst = max(ot_a, ot_b)
|
|
214
215
|
|
|
215
216
|
if worst >= _RISK_THRESHOLD and (i - last_spike_step) > _RISK_COOLDOWN:
|
|
216
|
-
|
|
217
|
+
sev: Literal["info", "warning", "critical"] = "critical" if worst >= 8 else "warning"
|
|
217
218
|
events.append(Event(
|
|
218
219
|
id=f"{episode_id}_risk_spike_{spike_idx}_{i}",
|
|
219
220
|
type="risk_spike",
|
|
220
221
|
step=i,
|
|
221
|
-
severity=
|
|
222
|
+
severity=sev,
|
|
222
223
|
score=worst / 10.0,
|
|
223
224
|
active_signals=[SignalValue(name="offtrack_risk", value=worst)],
|
|
224
225
|
))
|
|
@@ -104,7 +104,7 @@ def run_paired_episodes(
|
|
|
104
104
|
step=tick, progress=float(progress_a), reward=float(reward_a),
|
|
105
105
|
done=done_a,
|
|
106
106
|
termination=info_a.get("termination") if done_a else None,
|
|
107
|
-
state=car_state_a, action=action_list_a, info=_serialize_info(info_a),
|
|
107
|
+
state=car_state_a, action=action_list_a, info=_serialize_info(info_a), # type: ignore[arg-type]
|
|
108
108
|
)
|
|
109
109
|
|
|
110
110
|
if cancel_event is not None and cancel_event.is_set():
|
|
@@ -138,7 +138,7 @@ def run_paired_episodes(
|
|
|
138
138
|
step=tick, progress=float(progress_b), reward=float(reward_b),
|
|
139
139
|
done=done_b,
|
|
140
140
|
termination=info_b.get("termination") if done_b else None,
|
|
141
|
-
state=car_state_b, action=action_list_b, info=_serialize_info(info_b),
|
|
141
|
+
state=car_state_b, action=action_list_b, info=_serialize_info(info_b), # type: ignore[arg-type]
|
|
142
142
|
)
|
|
143
143
|
|
|
144
144
|
# Emit pair frame
|
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import contextlib
|
|
6
|
+
from collections.abc import Sequence
|
|
6
7
|
from datetime import datetime, timezone
|
|
7
|
-
from typing import Protocol, runtime_checkable
|
|
8
|
+
from typing import Any, Protocol, runtime_checkable
|
|
8
9
|
|
|
9
10
|
from psystack.pipeline.context import RunContext
|
|
10
11
|
from psystack.pipeline.stages.base import Stage
|
|
@@ -35,7 +36,7 @@ def utc_now() -> str:
|
|
|
35
36
|
|
|
36
37
|
def run_stages(
|
|
37
38
|
ctx: RunContext,
|
|
38
|
-
stages:
|
|
39
|
+
stages: Sequence[Stage],
|
|
39
40
|
*,
|
|
40
41
|
observer: StageObserver | None = None,
|
|
41
42
|
) -> None:
|
|
@@ -43,6 +44,7 @@ def run_stages(
|
|
|
43
44
|
all_names = [stage.name for stage in stages]
|
|
44
45
|
|
|
45
46
|
# Only use Rich Progress when no observer (CLI mode)
|
|
47
|
+
progress_ctx: Any
|
|
46
48
|
if observer is None:
|
|
47
49
|
from rich.progress import Progress
|
|
48
50
|
progress_ctx = Progress()
|
|
@@ -49,5 +49,5 @@ class EventStage:
|
|
|
49
49
|
"""Load episode data from the compare stage output."""
|
|
50
50
|
episodes_path = ctx.workspace / condition / "episodes.json"
|
|
51
51
|
if episodes_path.exists():
|
|
52
|
-
return json.loads(episodes_path.read_text())
|
|
52
|
+
return json.loads(episodes_path.read_text()) # type: ignore[no-any-return]
|
|
53
53
|
return []
|
|
@@ -25,7 +25,6 @@ from datetime import datetime, timezone
|
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from typing import Any
|
|
27
27
|
|
|
28
|
-
|
|
29
28
|
# -- Error Categories (D-07) --
|
|
30
29
|
# These document the 5 error categories from D-07. Used as the error_category
|
|
31
30
|
# argument to save_failed_attempt(). Callers should use these constants rather
|
|
@@ -72,7 +71,7 @@ def read_workspace_state(workspace: Path) -> dict[str, Any]:
|
|
|
72
71
|
"""Read workspace_state.json. Returns default state if missing."""
|
|
73
72
|
state_path = workspace / "workspace_state.json"
|
|
74
73
|
if state_path.exists():
|
|
75
|
-
return json.loads(state_path.read_text())
|
|
74
|
+
return json.loads(state_path.read_text()) # type: ignore[no-any-return]
|
|
76
75
|
return {"case_state": "draft", "attempts": []}
|
|
77
76
|
|
|
78
77
|
|
|
@@ -150,7 +149,7 @@ def load_result(workspace: Path) -> dict[str, Any] | None:
|
|
|
150
149
|
result_path = workspace / "analysis" / "result.json"
|
|
151
150
|
if not result_path.exists():
|
|
152
151
|
return None
|
|
153
|
-
return json.loads(result_path.read_text())
|
|
152
|
+
return json.loads(result_path.read_text()) # type: ignore[no-any-return]
|
|
154
153
|
|
|
155
154
|
|
|
156
155
|
class _NumpyEncoder(json.JSONEncoder):
|