preflight-ml 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- preflight_ml-0.1.0/.gitattributes +6 -0
- preflight_ml-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +46 -0
- preflight_ml-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +21 -0
- preflight_ml-0.1.0/.github/workflows/ci.yml +36 -0
- preflight_ml-0.1.0/.github/workflows/release.yml +28 -0
- preflight_ml-0.1.0/.gitignore +31 -0
- preflight_ml-0.1.0/.pre-commit-config.yaml +14 -0
- preflight_ml-0.1.0/.preflight.toml +23 -0
- preflight_ml-0.1.0/CHANGELOG.md +17 -0
- preflight_ml-0.1.0/CONTRIBUTING.md +35 -0
- preflight_ml-0.1.0/LICENSE +21 -0
- preflight_ml-0.1.0/PKG-INFO +224 -0
- preflight_ml-0.1.0/README.md +171 -0
- preflight_ml-0.1.0/preflight/__init__.py +7 -0
- preflight_ml-0.1.0/preflight/checks/__init__.py +1 -0
- preflight_ml-0.1.0/preflight/checks/data.py +182 -0
- preflight_ml-0.1.0/preflight/checks/model.py +145 -0
- preflight_ml-0.1.0/preflight/checks/resources.py +146 -0
- preflight_ml-0.1.0/preflight/checks/splits.py +134 -0
- preflight_ml-0.1.0/preflight/cli.py +129 -0
- preflight_ml-0.1.0/preflight/config.py +20 -0
- preflight_ml-0.1.0/preflight/registry.py +41 -0
- preflight_ml-0.1.0/preflight/reporter.py +92 -0
- preflight_ml-0.1.0/preflight/runner.py +50 -0
- preflight_ml-0.1.0/pyproject.toml +69 -0
- preflight_ml-0.1.0/tests/__init__.py +0 -0
- preflight_ml-0.1.0/tests/conftest.py +45 -0
- preflight_ml-0.1.0/tests/test_data.py +32 -0
- preflight_ml-0.1.0/tests/test_model.py +28 -0
- preflight_ml-0.1.0/tests/test_resources.py +19 -0
- preflight_ml-0.1.0/tests/test_splits.py +28 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: Bug report
|
|
2
|
+
description: Something is broken
|
|
3
|
+
labels: ["bug"]
|
|
4
|
+
body:
|
|
5
|
+
- type: input
|
|
6
|
+
id: preflight-version
|
|
7
|
+
attributes:
|
|
8
|
+
label: preflight version
|
|
9
|
+
placeholder: "0.1.0"
|
|
10
|
+
validations:
|
|
11
|
+
required: true
|
|
12
|
+
- type: input
|
|
13
|
+
id: pytorch-version
|
|
14
|
+
attributes:
|
|
15
|
+
label: PyTorch version
|
|
16
|
+
placeholder: "2.1.0"
|
|
17
|
+
validations:
|
|
18
|
+
required: true
|
|
19
|
+
- type: input
|
|
20
|
+
id: python-version
|
|
21
|
+
attributes:
|
|
22
|
+
label: Python version
|
|
23
|
+
placeholder: "3.11"
|
|
24
|
+
validations:
|
|
25
|
+
required: true
|
|
26
|
+
- type: dropdown
|
|
27
|
+
id: os
|
|
28
|
+
attributes:
|
|
29
|
+
label: OS
|
|
30
|
+
options: [Windows, macOS, Linux]
|
|
31
|
+
validations:
|
|
32
|
+
required: true
|
|
33
|
+
- type: textarea
|
|
34
|
+
id: reproduction
|
|
35
|
+
attributes:
|
|
36
|
+
label: Minimal reproduction
|
|
37
|
+
description: Smallest code that reproduces the issue
|
|
38
|
+
render: python
|
|
39
|
+
validations:
|
|
40
|
+
required: true
|
|
41
|
+
- type: textarea
|
|
42
|
+
id: expected
|
|
43
|
+
attributes:
|
|
44
|
+
label: Expected vs actual output
|
|
45
|
+
validations:
|
|
46
|
+
required: true
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
name: Feature request
|
|
2
|
+
description: Suggest a new check or improvement
|
|
3
|
+
labels: ["enhancement"]
|
|
4
|
+
body:
|
|
5
|
+
- type: textarea
|
|
6
|
+
id: problem
|
|
7
|
+
attributes:
|
|
8
|
+
label: What silent failure would this catch?
|
|
9
|
+
validations:
|
|
10
|
+
required: true
|
|
11
|
+
- type: textarea
|
|
12
|
+
id: solution
|
|
13
|
+
attributes:
|
|
14
|
+
label: Proposed check or feature
|
|
15
|
+
validations:
|
|
16
|
+
required: true
|
|
17
|
+
- type: dropdown
|
|
18
|
+
id: severity
|
|
19
|
+
attributes:
|
|
20
|
+
label: Suggested severity
|
|
21
|
+
options: [FATAL, WARN, INFO]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.9", "3.10", "3.11"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
pip install torch --index-url https://download.pytorch.org/whl/cpu
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Lint
|
|
30
|
+
run: ruff check .
|
|
31
|
+
|
|
32
|
+
- name: Type check
|
|
33
|
+
run: mypy preflight/ --ignore-missing-imports --no-site-packages
|
|
34
|
+
|
|
35
|
+
- name: Test
|
|
36
|
+
run: pytest
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Set up Python
|
|
15
|
+
uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.11"
|
|
18
|
+
|
|
19
|
+
- name: Install hatch
|
|
20
|
+
run: pip install hatch
|
|
21
|
+
|
|
22
|
+
- name: Build
|
|
23
|
+
run: hatch build
|
|
24
|
+
|
|
25
|
+
- name: Publish to PyPI
|
|
26
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
27
|
+
with:
|
|
28
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
.Python
|
|
7
|
+
*.egg-info/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
.eggs/
|
|
11
|
+
|
|
12
|
+
# Testing
|
|
13
|
+
.coverage
|
|
14
|
+
.coverage.*
|
|
15
|
+
htmlcov/
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
|
|
18
|
+
# Env
|
|
19
|
+
.env
|
|
20
|
+
.venv
|
|
21
|
+
env/
|
|
22
|
+
venv/
|
|
23
|
+
|
|
24
|
+
# VS Code
|
|
25
|
+
.vscode/
|
|
26
|
+
|
|
27
|
+
# OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
Thumbs.db
|
|
30
|
+
|
|
31
|
+
dist/
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.3.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
9
|
+
rev: v4.5.0
|
|
10
|
+
hooks:
|
|
11
|
+
- id: trailing-whitespace
|
|
12
|
+
- id: end-of-file-fixer
|
|
13
|
+
- id: check-yaml
|
|
14
|
+
- id: check-toml
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[checks]
|
|
2
|
+
# set false to disable any check
|
|
3
|
+
nan_inf_detection = true
|
|
4
|
+
normalisation_sanity = true
|
|
5
|
+
channel_ordering = true
|
|
6
|
+
label_leakage = true
|
|
7
|
+
split_sizes = true
|
|
8
|
+
vram_estimation = true
|
|
9
|
+
class_imbalance = true
|
|
10
|
+
shape_mismatch = true
|
|
11
|
+
gradient_check = true
|
|
12
|
+
|
|
13
|
+
[thresholds]
|
|
14
|
+
nan_sample_batches = 10
|
|
15
|
+
norm_sample_batches = 5
|
|
16
|
+
leakage_sample_batches = 20
|
|
17
|
+
imbalance_sample_batches = 10
|
|
18
|
+
imbalance_threshold = 0.1
|
|
19
|
+
|
|
20
|
+
[ignore]
|
|
21
|
+
# example: ignore a specific check with a reason
|
|
22
|
+
# check = "class_imbalance"
|
|
23
|
+
# reason = "intentional: rare event dataset"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to preflight will be documented here.
|
|
4
|
+
|
|
5
|
+
## [Unreleased]
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-03-15
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- 10 core pre-flight checks: NaN/Inf detection, normalisation sanity,
|
|
11
|
+
channel ordering, label leakage, split sizes, VRAM estimation,
|
|
12
|
+
class imbalance, shape mismatch, gradient check
|
|
13
|
+
- `.preflight.toml` config file support
|
|
14
|
+
- Rich terminal output with severity tiers (FATAL / WARN / INFO)
|
|
15
|
+
- JSON output mode (`--format json`) for CI integration
|
|
16
|
+
- `preflight run` and `preflight checks` CLI commands
|
|
17
|
+
- Fix hints for all failing checks
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Contributing to preflight
|
|
2
|
+
|
|
3
|
+
## Setup
|
|
4
|
+
```bash
|
|
5
|
+
git clone https://github.com/Rusheel86/preflight.git
|
|
6
|
+
cd preflight
|
|
7
|
+
conda create -n preflight-dev python=3.11 -y
|
|
8
|
+
conda activate preflight-dev
|
|
9
|
+
pip install torch --index-url https://download.pytorch.org/whl/cpu
|
|
10
|
+
pip install -e ".[dev]"
|
|
11
|
+
pre-commit install
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Running tests
|
|
15
|
+
```bash
|
|
16
|
+
pytest
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Adding a new check
|
|
20
|
+
|
|
21
|
+
1. Add a function to the appropriate file in `preflight/checks/`
|
|
22
|
+
2. Decorate it with `@register`
|
|
23
|
+
3. Return a `CheckResult` with the correct severity
|
|
24
|
+
4. Write two tests: one that passes, one that fails
|
|
25
|
+
5. Add an entry to `CHANGELOG.md`
|
|
26
|
+
|
|
27
|
+
Every check function must accept `(dataloader, model, loss_fn, config)` as keyword arguments even if it only uses some of them.
|
|
28
|
+
|
|
29
|
+
## Commit style
|
|
30
|
+
|
|
31
|
+
`fix: correct NaN detection in multi-output dataloaders`
|
|
32
|
+
`feat: add duplicate sample detection check`
|
|
33
|
+
`docs: add example for custom config`
|
|
34
|
+
|
|
35
|
+
Sign all commits: `git commit -s -m "your message"`
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rusheel Sharma
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: preflight-ml
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pre-flight checks for PyTorch pipelines. Catch silent failures before they waste your GPU.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Rusheel86/preflight
|
|
6
|
+
Project-URL: Repository, https://github.com/Rusheel86/preflight
|
|
7
|
+
Project-URL: Issues, https://github.com/Rusheel86/preflight/issues
|
|
8
|
+
Author: Rusheel Sharma
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Rusheel Sharma
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: data-validation,debugging,deep-learning,machine-learning,mlops,pytorch
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: Intended Audience :: Science/Research
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
41
|
+
Requires-Python: >=3.9
|
|
42
|
+
Requires-Dist: click>=8.0
|
|
43
|
+
Requires-Dist: numpy>=1.20
|
|
44
|
+
Requires-Dist: rich>=12.0
|
|
45
|
+
Requires-Dist: torch>=1.9
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: pre-commit>=3.0; extra == 'dev'
|
|
49
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
50
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
51
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
52
|
+
Description-Content-Type: text/markdown
|
|
53
|
+
|
|
54
|
+
# preflight
|
|
55
|
+
|
|
56
|
+
> Pre-flight checks for PyTorch pipelines. Catch silent failures before they waste your GPU.
|
|
57
|
+
|
|
58
|
+
[](https://github.com/Rusheel86/preflight/actions/workflows/ci.yml)
|
|
59
|
+
[](https://pypi.org/project/preflight-ml/)
|
|
60
|
+
[](https://pypi.org/project/preflight-ml/)
|
|
61
|
+
[](LICENSE)
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
Most deep learning bugs don't crash your training loop — they silently produce a garbage model.
|
|
66
|
+
NaNs in your data, labels leaking between train and val, wrong channel ordering, dead gradients.
|
|
67
|
+
You won't know until hours later, after the GPU bill has landed.
|
|
68
|
+
|
|
69
|
+
**preflight** is a pre-training validation tool you run in 30 seconds before starting any training job.
|
|
70
|
+
It's not a linter. It's a pre-flight check — the kind pilots run before the expensive thing takes off.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Install
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install preflight-ml
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Quickstart
|
|
81
|
+
|
|
82
|
+
Create a small Python file that exposes your dataloader:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
# my_dataloader.py
|
|
86
|
+
import torch
|
|
87
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
88
|
+
|
|
89
|
+
x = torch.randn(200, 3, 224, 224)
|
|
90
|
+
y = torch.randint(0, 10, (200,))
|
|
91
|
+
dataloader = DataLoader(TensorDataset(x, y), batch_size=32)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Run preflight:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
preflight run --dataloader my_dataloader.py
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Output:
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
preflight — pre-training check report
|
|
104
|
+
╭────────────────────────┬──────────┬────────┬──────────────────────────────────────────────────╮
|
|
105
|
+
│ Check │ Severity │ Status │ Message │
|
|
106
|
+
├────────────────────────┼──────────┼────────┼──────────────────────────────────────────────────┤
|
|
107
|
+
│ nan_inf_detection │ FATAL │ PASS │ No NaN or Inf values found in 10 sampled batches │
|
|
108
|
+
│ normalisation_sanity │ WARN │ PASS │ Normalisation looks reasonable (mean=0.001) │
|
|
109
|
+
│ channel_ordering │ WARN │ PASS │ Channel ordering looks correct (NCHW) │
|
|
110
|
+
│ label_leakage │ FATAL │ PASS │ No val_dataloader provided — skipped │
|
|
111
|
+
│ split_sizes │ INFO │ PASS │ train=200 samples │
|
|
112
|
+
│ vram_estimation │ WARN │ INFO │ No CUDA GPU detected — skipped │
|
|
113
|
+
│ class_imbalance │ WARN │ PASS │ Class distribution looks balanced │
|
|
114
|
+
│ shape_mismatch │ FATAL │ PASS │ No model provided — skipped │
|
|
115
|
+
│ gradient_check │ FATAL │ PASS │ No model+loss provided — skipped │
|
|
116
|
+
╰────────────────────────┴──────────┴────────┴──────────────────────────────────────────────────╯
|
|
117
|
+
|
|
118
|
+
0 fatal 0 warnings 9 passed
|
|
119
|
+
|
|
120
|
+
Pre-flight passed. Safe to start training.
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Checks
|
|
124
|
+
|
|
125
|
+
preflight runs 10 checks across three severity tiers. A **FATAL** failure exits with code 1 and blocks CI.
|
|
126
|
+
|
|
127
|
+
| Check | Severity | What it catches |
|
|
128
|
+
|---|---|---|
|
|
129
|
+
| `nan_inf_detection` | FATAL | NaN or Inf values anywhere in sampled batches |
|
|
130
|
+
| `label_leakage` | FATAL | Samples appearing in both train and val sets |
|
|
131
|
+
| `shape_mismatch` | FATAL | Dataset output shape incompatible with model input |
|
|
132
|
+
| `gradient_check` | FATAL | Zero gradients, dead layers, exploding gradients |
|
|
133
|
+
| `normalisation_sanity` | WARN | Data that looks unnormalised (raw pixel values etc.) |
|
|
134
|
+
| `channel_ordering` | WARN | NHWC tensors when PyTorch expects NCHW |
|
|
135
|
+
| `vram_estimation` | WARN | Estimated peak VRAM exceeds 90% of GPU memory |
|
|
136
|
+
| `class_imbalance` | WARN | Severe class imbalance beyond configurable threshold |
|
|
137
|
+
| `split_sizes` | INFO | Empty or degenerate train/val splits |
|
|
138
|
+
| `duplicate_samples` | INFO | Identical samples within a split |
|
|
139
|
+
|
|
140
|
+
## With a model
|
|
141
|
+
|
|
142
|
+
Pass a model file to enable shape, gradient, and VRAM checks:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
# my_model.py
|
|
146
|
+
import torch.nn as nn
|
|
147
|
+
model = nn.Sequential(nn.Flatten(), nn.Linear(3 * 224 * 224, 10))
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# my_loss.py
|
|
152
|
+
import torch.nn as nn
|
|
153
|
+
loss_fn = nn.CrossEntropyLoss()
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
preflight run \
|
|
158
|
+
--dataloader my_dataloader.py \
|
|
159
|
+
--model my_model.py \
|
|
160
|
+
--loss my_loss.py \
|
|
161
|
+
--val-dataloader my_val_dataloader.py
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Configuration
|
|
165
|
+
|
|
166
|
+
Add a `.preflight.toml` to your repo root to configure thresholds and disable checks:
|
|
167
|
+
|
|
168
|
+
```toml
|
|
169
|
+
[thresholds]
|
|
170
|
+
imbalance_threshold = 0.05
|
|
171
|
+
nan_sample_batches = 20
|
|
172
|
+
|
|
173
|
+
[checks]
|
|
174
|
+
vram_estimation = false
|
|
175
|
+
|
|
176
|
+
[ignore]
|
|
177
|
+
# check = "class_imbalance"
|
|
178
|
+
# reason = "intentional: rare event dataset"
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## CI integration
|
|
182
|
+
|
|
183
|
+
Add to your GitHub Actions workflow:
|
|
184
|
+
|
|
185
|
+
```yaml
|
|
186
|
+
- name: Install preflight
|
|
187
|
+
run: pip install preflight-ml
|
|
188
|
+
|
|
189
|
+
- name: Run pre-flight checks
|
|
190
|
+
run: preflight run --dataloader scripts/dataloader.py --format json
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
The `--format json` flag outputs machine-readable results. Exit code is `1` if any FATAL check fails, `0` otherwise.
|
|
194
|
+
|
|
195
|
+
## List all checks
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
preflight checks
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## What preflight does NOT do
|
|
202
|
+
|
|
203
|
+
- It does not replace unit tests. Use pytest for code logic.
|
|
204
|
+
- It does not guarantee a correct model. Passing preflight is a minimum safety bar, not a certification.
|
|
205
|
+
- It does not run your full training loop. Use it as a gate before training starts.
|
|
206
|
+
- It does not modify your code unless you pass `--fix`.
|
|
207
|
+
|
|
208
|
+
## Roadmap
|
|
209
|
+
|
|
210
|
+
- [ ] `--fix` flag — auto-patch common issues (channel ordering, normalisation)
|
|
211
|
+
- [ ] Dataset snapshot + drift detection (`preflight diff baseline.json new_data.pt`)
|
|
212
|
+
- [ ] Full dry-run mode (one batch through model + loss + backward)
|
|
213
|
+
- [ ] Jupyter magic command (`%load_ext preflight`)
|
|
214
|
+
- [ ] `preflight-monai` plugin for medical imaging checks
|
|
215
|
+
- [ ] `preflight-sktime` plugin for time series checks
|
|
216
|
+
|
|
217
|
+
## Contributing
|
|
218
|
+
|
|
219
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). New checks are welcome — each one needs a passing test,
|
|
220
|
+
a failing test, and a fix hint.
|
|
221
|
+
|
|
222
|
+
## License
|
|
223
|
+
|
|
224
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# preflight
|
|
2
|
+
|
|
3
|
+
> Pre-flight checks for PyTorch pipelines. Catch silent failures before they waste your GPU.
|
|
4
|
+
|
|
5
|
+
[](https://github.com/Rusheel86/preflight/actions/workflows/ci.yml)
|
|
6
|
+
[](https://pypi.org/project/preflight-ml/)
|
|
7
|
+
[](https://pypi.org/project/preflight-ml/)
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
Most deep learning bugs don't crash your training loop — they silently produce a garbage model.
|
|
13
|
+
NaNs in your data, labels leaking between train and val, wrong channel ordering, dead gradients.
|
|
14
|
+
You won't know until hours later, after the GPU bill has landed.
|
|
15
|
+
|
|
16
|
+
**preflight** is a pre-training validation tool you run in 30 seconds before starting any training job.
|
|
17
|
+
It's not a linter. It's a pre-flight check — the kind pilots run before the expensive thing takes off.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install preflight-ml
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Quickstart
|
|
28
|
+
|
|
29
|
+
Create a small Python file that exposes your dataloader:
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
# my_dataloader.py
|
|
33
|
+
import torch
|
|
34
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
35
|
+
|
|
36
|
+
x = torch.randn(200, 3, 224, 224)
|
|
37
|
+
y = torch.randint(0, 10, (200,))
|
|
38
|
+
dataloader = DataLoader(TensorDataset(x, y), batch_size=32)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Run preflight:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
preflight run --dataloader my_dataloader.py
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Output:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
preflight — pre-training check report
|
|
51
|
+
╭────────────────────────┬──────────┬────────┬──────────────────────────────────────────────────╮
|
|
52
|
+
│ Check │ Severity │ Status │ Message │
|
|
53
|
+
├────────────────────────┼──────────┼────────┼──────────────────────────────────────────────────┤
|
|
54
|
+
│ nan_inf_detection │ FATAL │ PASS │ No NaN or Inf values found in 10 sampled batches │
|
|
55
|
+
│ normalisation_sanity │ WARN │ PASS │ Normalisation looks reasonable (mean=0.001) │
|
|
56
|
+
│ channel_ordering │ WARN │ PASS │ Channel ordering looks correct (NCHW) │
|
|
57
|
+
│ label_leakage │ FATAL │ PASS │ No val_dataloader provided — skipped │
|
|
58
|
+
│ split_sizes │ INFO │ PASS │ train=200 samples │
|
|
59
|
+
│ vram_estimation │ WARN │ INFO │ No CUDA GPU detected — skipped │
|
|
60
|
+
│ class_imbalance │ WARN │ PASS │ Class distribution looks balanced │
|
|
61
|
+
│ shape_mismatch │ FATAL │ PASS │ No model provided — skipped │
|
|
62
|
+
│ gradient_check │ FATAL │ PASS │ No model+loss provided — skipped │
|
|
63
|
+
╰────────────────────────┴──────────┴────────┴──────────────────────────────────────────────────╯
|
|
64
|
+
|
|
65
|
+
0 fatal 0 warnings 9 passed
|
|
66
|
+
|
|
67
|
+
Pre-flight passed. Safe to start training.
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Checks
|
|
71
|
+
|
|
72
|
+
preflight runs 10 checks across three severity tiers. A **FATAL** failure exits with code 1 and blocks CI.
|
|
73
|
+
|
|
74
|
+
| Check | Severity | What it catches |
|
|
75
|
+
|---|---|---|
|
|
76
|
+
| `nan_inf_detection` | FATAL | NaN or Inf values anywhere in sampled batches |
|
|
77
|
+
| `label_leakage` | FATAL | Samples appearing in both train and val sets |
|
|
78
|
+
| `shape_mismatch` | FATAL | Dataset output shape incompatible with model input |
|
|
79
|
+
| `gradient_check` | FATAL | Zero gradients, dead layers, exploding gradients |
|
|
80
|
+
| `normalisation_sanity` | WARN | Data that looks unnormalised (raw pixel values etc.) |
|
|
81
|
+
| `channel_ordering` | WARN | NHWC tensors when PyTorch expects NCHW |
|
|
82
|
+
| `vram_estimation` | WARN | Estimated peak VRAM exceeds 90% of GPU memory |
|
|
83
|
+
| `class_imbalance` | WARN | Severe class imbalance beyond configurable threshold |
|
|
84
|
+
| `split_sizes` | INFO | Empty or degenerate train/val splits |
|
|
85
|
+
| `duplicate_samples` | INFO | Identical samples within a split |
|
|
86
|
+
|
|
87
|
+
## With a model
|
|
88
|
+
|
|
89
|
+
Pass a model file to enable shape, gradient, and VRAM checks:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
# my_model.py
|
|
93
|
+
import torch.nn as nn
|
|
94
|
+
model = nn.Sequential(nn.Flatten(), nn.Linear(3 * 224 * 224, 10))
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
# my_loss.py
|
|
99
|
+
import torch.nn as nn
|
|
100
|
+
loss_fn = nn.CrossEntropyLoss()
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
preflight run \
|
|
105
|
+
--dataloader my_dataloader.py \
|
|
106
|
+
--model my_model.py \
|
|
107
|
+
--loss my_loss.py \
|
|
108
|
+
--val-dataloader my_val_dataloader.py
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Configuration
|
|
112
|
+
|
|
113
|
+
Add a `.preflight.toml` to your repo root to configure thresholds and disable checks:
|
|
114
|
+
|
|
115
|
+
```toml
|
|
116
|
+
[thresholds]
|
|
117
|
+
imbalance_threshold = 0.05
|
|
118
|
+
nan_sample_batches = 20
|
|
119
|
+
|
|
120
|
+
[checks]
|
|
121
|
+
vram_estimation = false
|
|
122
|
+
|
|
123
|
+
[ignore]
|
|
124
|
+
# check = "class_imbalance"
|
|
125
|
+
# reason = "intentional: rare event dataset"
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## CI integration
|
|
129
|
+
|
|
130
|
+
Add to your GitHub Actions workflow:
|
|
131
|
+
|
|
132
|
+
```yaml
|
|
133
|
+
- name: Install preflight
|
|
134
|
+
run: pip install preflight-ml
|
|
135
|
+
|
|
136
|
+
- name: Run pre-flight checks
|
|
137
|
+
run: preflight run --dataloader scripts/dataloader.py --format json
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
The `--format json` flag outputs machine-readable results. Exit code is `1` if any FATAL check fails, `0` otherwise.
|
|
141
|
+
|
|
142
|
+
## List all checks
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
preflight checks
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## What preflight does NOT do
|
|
149
|
+
|
|
150
|
+
- It does not replace unit tests. Use pytest for code logic.
|
|
151
|
+
- It does not guarantee a correct model. Passing preflight is a minimum safety bar, not a certification.
|
|
152
|
+
- It does not run your full training loop. Use it as a gate before training starts.
|
|
153
|
+
- It does not modify your code unless you pass `--fix`.
|
|
154
|
+
|
|
155
|
+
## Roadmap
|
|
156
|
+
|
|
157
|
+
- [ ] `--fix` flag — auto-patch common issues (channel ordering, normalisation)
|
|
158
|
+
- [ ] Dataset snapshot + drift detection (`preflight diff baseline.json new_data.pt`)
|
|
159
|
+
- [ ] Full dry-run mode (one batch through model + loss + backward)
|
|
160
|
+
- [ ] Jupyter magic command (`%load_ext preflight`)
|
|
161
|
+
- [ ] `preflight-monai` plugin for medical imaging checks
|
|
162
|
+
- [ ] `preflight-sktime` plugin for time series checks
|
|
163
|
+
|
|
164
|
+
## Contributing
|
|
165
|
+
|
|
166
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). New checks are welcome — each one needs a passing test,
|
|
167
|
+
a failing test, and a fix hint.
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT — see [LICENSE](LICENSE).
|