preflight-ml 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ * text=auto
2
+ *.py text eol=lf
3
+ *.toml text eol=lf
4
+ *.yaml text eol=lf
5
+ *.yml text eol=lf
6
+ *.md text eol=lf
@@ -0,0 +1,46 @@
1
+ name: Bug report
2
+ description: Something is broken
3
+ labels: ["bug"]
4
+ body:
5
+ - type: input
6
+ id: preflight-version
7
+ attributes:
8
+ label: preflight version
9
+ placeholder: "0.1.0"
10
+ validations:
11
+ required: true
12
+ - type: input
13
+ id: pytorch-version
14
+ attributes:
15
+ label: PyTorch version
16
+ placeholder: "2.1.0"
17
+ validations:
18
+ required: true
19
+ - type: input
20
+ id: python-version
21
+ attributes:
22
+ label: Python version
23
+ placeholder: "3.11"
24
+ validations:
25
+ required: true
26
+ - type: dropdown
27
+ id: os
28
+ attributes:
29
+ label: OS
30
+ options: [Windows, macOS, Linux]
31
+ validations:
32
+ required: true
33
+ - type: textarea
34
+ id: reproduction
35
+ attributes:
36
+ label: Minimal reproduction
37
+ description: Smallest code that reproduces the issue
38
+ render: python
39
+ validations:
40
+ required: true
41
+ - type: textarea
42
+ id: expected
43
+ attributes:
44
+ label: Expected vs actual output
45
+ validations:
46
+ required: true
@@ -0,0 +1,21 @@
1
+ name: Feature request
2
+ description: Suggest a new check or improvement
3
+ labels: ["enhancement"]
4
+ body:
5
+ - type: textarea
6
+ id: problem
7
+ attributes:
8
+ label: What silent failure would this catch?
9
+ validations:
10
+ required: true
11
+ - type: textarea
12
+ id: solution
13
+ attributes:
14
+ label: Proposed check or feature
15
+ validations:
16
+ required: true
17
+ - type: dropdown
18
+ id: severity
19
+ attributes:
20
+ label: Suggested severity
21
+ options: [FATAL, WARN, INFO]
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.9", "3.10", "3.11"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ pip install torch --index-url https://download.pytorch.org/whl/cpu
27
+ pip install -e ".[dev]"
28
+
29
+ - name: Lint
30
+ run: ruff check .
31
+
32
+ - name: Type check
33
+ run: mypy preflight/ --ignore-missing-imports --no-site-packages
34
+
35
+ - name: Test
36
+ run: pytest
@@ -0,0 +1,28 @@
1
+ name: Release to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.11"
18
+
19
+ - name: Install hatch
20
+ run: pip install hatch
21
+
22
+ - name: Build
23
+ run: hatch build
24
+
25
+ - name: Publish to PyPI
26
+ uses: pypa/gh-action-pypi-publish@release/v1
27
+ with:
28
+ password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,31 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .eggs/
11
+
12
+ # Testing
13
+ .coverage
14
+ .coverage.*
15
+ htmlcov/
16
+ .pytest_cache/
17
+
18
+ # Env
19
+ .env
20
+ .venv
21
+ env/
22
+ venv/
23
+
24
+ # VS Code
25
+ .vscode/
26
+
27
+ # OS
28
+ .DS_Store
29
+ Thumbs.db
30
+
31
+ dist/
@@ -0,0 +1,14 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.3.0
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+ - repo: https://github.com/pre-commit/pre-commit-hooks
9
+ rev: v4.5.0
10
+ hooks:
11
+ - id: trailing-whitespace
12
+ - id: end-of-file-fixer
13
+ - id: check-yaml
14
+ - id: check-toml
@@ -0,0 +1,23 @@
1
+ [checks]
2
+ # set false to disable any check
3
+ nan_inf_detection = true
4
+ normalisation_sanity = true
5
+ channel_ordering = true
6
+ label_leakage = true
7
+ split_sizes = true
8
+ vram_estimation = true
9
+ class_imbalance = true
10
+ shape_mismatch = true
11
+ gradient_check = true
12
+
13
+ [thresholds]
14
+ nan_sample_batches = 10
15
+ norm_sample_batches = 5
16
+ leakage_sample_batches = 20
17
+ imbalance_sample_batches = 10
18
+ imbalance_threshold = 0.1
19
+
20
+ [ignore]
21
+ # example: ignore a specific check with a reason
22
+ # check = "class_imbalance"
23
+ # reason = "intentional: rare event dataset"
@@ -0,0 +1,17 @@
1
+ # Changelog
2
+
3
+ All notable changes to preflight will be documented here.
4
+
5
+ ## [Unreleased]
6
+
7
+ ## [0.1.0] - 2026-03-15
8
+
9
+ ### Added
10
+ - 10 core pre-flight checks: NaN/Inf detection, normalisation sanity,
11
+ channel ordering, label leakage, split sizes, VRAM estimation,
12
+ class imbalance, shape mismatch, gradient check
13
+ - `.preflight.toml` config file support
14
+ - Rich terminal output with severity tiers (FATAL / WARN / INFO)
15
+ - JSON output mode (`--format json`) for CI integration
16
+ - `preflight run` and `preflight checks` CLI commands
17
+ - Fix hints for all failing checks
@@ -0,0 +1,35 @@
1
+ # Contributing to preflight
2
+
3
+ ## Setup
4
+ ```bash
5
+ git clone https://github.com/Rusheel86/preflight.git
6
+ cd preflight
7
+ conda create -n preflight-dev python=3.11 -y
8
+ conda activate preflight-dev
9
+ pip install torch --index-url https://download.pytorch.org/whl/cpu
10
+ pip install -e ".[dev]"
11
+ pre-commit install
12
+ ```
13
+
14
+ ## Running tests
15
+ ```bash
16
+ pytest
17
+ ```
18
+
19
+ ## Adding a new check
20
+
21
+ 1. Add a function to the appropriate file in `preflight/checks/`
22
+ 2. Decorate it with `@register`
23
+ 3. Return a `CheckResult` with the correct severity
24
+ 4. Write two tests: one that passes, one that fails
25
+ 5. Add an entry to `CHANGELOG.md`
26
+
27
+ Every check function must accept `(dataloader, model, loss_fn, config)` as keyword arguments even if it only uses some of them.
28
+
29
+ ## Commit style
30
+
31
+ `fix: correct NaN detection in multi-output dataloaders`
32
+ `feat: add duplicate sample detection check`
33
+ `docs: add example for custom config`
34
+
35
+ Sign all commits: `git commit -s -m "your message"`
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rusheel Sharma
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,224 @@
1
+ Metadata-Version: 2.4
2
+ Name: preflight-ml
3
+ Version: 0.1.0
4
+ Summary: Pre-flight checks for PyTorch pipelines. Catch silent failures before they waste your GPU.
5
+ Project-URL: Homepage, https://github.com/Rusheel86/preflight
6
+ Project-URL: Repository, https://github.com/Rusheel86/preflight
7
+ Project-URL: Issues, https://github.com/Rusheel86/preflight/issues
8
+ Author: Rusheel Sharma
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Rusheel Sharma
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: data-validation,debugging,deep-learning,machine-learning,mlops,pytorch
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: Intended Audience :: Science/Research
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3.9
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
41
+ Requires-Python: >=3.9
42
+ Requires-Dist: click>=8.0
43
+ Requires-Dist: numpy>=1.20
44
+ Requires-Dist: rich>=12.0
45
+ Requires-Dist: torch>=1.9
46
+ Provides-Extra: dev
47
+ Requires-Dist: mypy>=1.0; extra == 'dev'
48
+ Requires-Dist: pre-commit>=3.0; extra == 'dev'
49
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
50
+ Requires-Dist: pytest>=7.0; extra == 'dev'
51
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # preflight
55
+
56
+ > Pre-flight checks for PyTorch pipelines. Catch silent failures before they waste your GPU.
57
+
58
+ [![CI](https://github.com/Rusheel86/preflight/actions/workflows/ci.yml/badge.svg)](https://github.com/Rusheel86/preflight/actions/workflows/ci.yml)
59
+ [![PyPI version](https://img.shields.io/pypi/v/preflight-ml)](https://pypi.org/project/preflight-ml/)
60
+ [![Python](https://img.shields.io/pypi/pyversions/preflight-ml)](https://pypi.org/project/preflight-ml/)
61
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
62
+
63
+ ---
64
+
65
+ Most deep learning bugs don't crash your training loop — they silently produce a garbage model.
66
+ NaNs in your data, labels leaking between train and val, wrong channel ordering, dead gradients.
67
+ You won't know until hours later, after the GPU bill has landed.
68
+
69
+ **preflight** is a pre-training validation tool you run in 30 seconds before starting any training job.
70
+ It's not a linter. It's a pre-flight check — the kind pilots run before the expensive thing takes off.
71
+
72
+ ---
73
+
74
+ ## Install
75
+
76
+ ```bash
77
+ pip install preflight-ml
78
+ ```
79
+
80
+ ## Quickstart
81
+
82
+ Create a small Python file that exposes your dataloader:
83
+
84
+ ```python
85
+ # my_dataloader.py
86
+ import torch
87
+ from torch.utils.data import DataLoader, TensorDataset
88
+
89
+ x = torch.randn(200, 3, 224, 224)
90
+ y = torch.randint(0, 10, (200,))
91
+ dataloader = DataLoader(TensorDataset(x, y), batch_size=32)
92
+ ```
93
+
94
+ Run preflight:
95
+
96
+ ```bash
97
+ preflight run --dataloader my_dataloader.py
98
+ ```
99
+
100
+ Output:
101
+
102
+ ```
103
+ preflight — pre-training check report
104
+ ╭────────────────────────┬──────────┬────────┬──────────────────────────────────────────────────╮
105
+ │ Check │ Severity │ Status │ Message │
106
+ ├────────────────────────┼──────────┼────────┼──────────────────────────────────────────────────┤
107
+ │ nan_inf_detection │ FATAL │ PASS │ No NaN or Inf values found in 10 sampled batches │
108
+ │ normalisation_sanity │ WARN │ PASS │ Normalisation looks reasonable (mean=0.001) │
109
+ │ channel_ordering │ WARN │ PASS │ Channel ordering looks correct (NCHW) │
110
+ │ label_leakage │ FATAL │ PASS │ No val_dataloader provided — skipped │
111
+ │ split_sizes │ INFO │ PASS │ train=200 samples │
112
+ │ vram_estimation │ WARN │ INFO │ No CUDA GPU detected — skipped │
113
+ │ class_imbalance │ WARN │ PASS │ Class distribution looks balanced │
114
+ │ shape_mismatch │ FATAL │ PASS │ No model provided — skipped │
115
+ │ gradient_check │ FATAL │ PASS │ No model+loss provided — skipped │
116
+ ╰────────────────────────┴──────────┴────────┴──────────────────────────────────────────────────╯
117
+
118
+ 0 fatal 0 warnings 9 passed
119
+
120
+ Pre-flight passed. Safe to start training.
121
+ ```
122
+
123
+ ## Checks
124
+
125
+ preflight runs 10 checks across three severity tiers. A **FATAL** failure exits with code 1 and blocks CI.
126
+
127
+ | Check | Severity | What it catches |
128
+ |---|---|---|
129
+ | `nan_inf_detection` | FATAL | NaN or Inf values anywhere in sampled batches |
130
+ | `label_leakage` | FATAL | Samples appearing in both train and val sets |
131
+ | `shape_mismatch` | FATAL | Dataset output shape incompatible with model input |
132
+ | `gradient_check` | FATAL | Zero gradients, dead layers, exploding gradients |
133
+ | `normalisation_sanity` | WARN | Data that looks unnormalised (raw pixel values etc.) |
134
+ | `channel_ordering` | WARN | NHWC tensors when PyTorch expects NCHW |
135
+ | `vram_estimation` | WARN | Estimated peak VRAM exceeds 90% of GPU memory |
136
+ | `class_imbalance` | WARN | Severe class imbalance beyond configurable threshold |
137
+ | `split_sizes` | INFO | Empty or degenerate train/val splits |
138
+ | `duplicate_samples` | INFO | Identical samples within a split |
139
+
140
+ ## With a model
141
+
142
+ Pass a model file to enable shape, gradient, and VRAM checks:
143
+
144
+ ```python
145
+ # my_model.py
146
+ import torch.nn as nn
147
+ model = nn.Sequential(nn.Flatten(), nn.Linear(3 * 224 * 224, 10))
148
+ ```
149
+
150
+ ```python
151
+ # my_loss.py
152
+ import torch.nn as nn
153
+ loss_fn = nn.CrossEntropyLoss()
154
+ ```
155
+
156
+ ```bash
157
+ preflight run \
158
+ --dataloader my_dataloader.py \
159
+ --model my_model.py \
160
+ --loss my_loss.py \
161
+ --val-dataloader my_val_dataloader.py
162
+ ```
163
+
164
+ ## Configuration
165
+
166
+ Add a `.preflight.toml` to your repo root to configure thresholds and disable checks:
167
+
168
+ ```toml
169
+ [thresholds]
170
+ imbalance_threshold = 0.05
171
+ nan_sample_batches = 20
172
+
173
+ [checks]
174
+ vram_estimation = false
175
+
176
+ [ignore]
177
+ # check = "class_imbalance"
178
+ # reason = "intentional: rare event dataset"
179
+ ```
180
+
181
+ ## CI integration
182
+
183
+ Add to your GitHub Actions workflow:
184
+
185
+ ```yaml
186
+ - name: Install preflight
187
+ run: pip install preflight-ml
188
+
189
+ - name: Run pre-flight checks
190
+ run: preflight run --dataloader scripts/dataloader.py --format json
191
+ ```
192
+
193
+ The `--format json` flag outputs machine-readable results. Exit code is `1` if any FATAL check fails, `0` otherwise.
194
+
195
+ ## List all checks
196
+
197
+ ```bash
198
+ preflight checks
199
+ ```
200
+
201
+ ## What preflight does NOT do
202
+
203
+ - It does not replace unit tests. Use pytest for code logic.
204
+ - It does not guarantee a correct model. Passing preflight is a minimum safety bar, not a certification.
205
+ - It does not run your full training loop. Use it as a gate before training starts.
206
+ - It does not modify your code unless you pass `--fix`.
207
+
208
+ ## Roadmap
209
+
210
+ - [ ] `--fix` flag — auto-patch common issues (channel ordering, normalisation)
211
+ - [ ] Dataset snapshot + drift detection (`preflight diff baseline.json new_data.pt`)
212
+ - [ ] Full dry-run mode (one batch through model + loss + backward)
213
+ - [ ] Jupyter magic command (`%load_ext preflight`)
214
+ - [ ] `preflight-monai` plugin for medical imaging checks
215
+ - [ ] `preflight-sktime` plugin for time series checks
216
+
217
+ ## Contributing
218
+
219
+ See [CONTRIBUTING.md](CONTRIBUTING.md). New checks are welcome — each one needs a passing test,
220
+ a failing test, and a fix hint.
221
+
222
+ ## License
223
+
224
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,171 @@
1
+ # preflight
2
+
3
+ > Pre-flight checks for PyTorch pipelines. Catch silent failures before they waste your GPU.
4
+
5
+ [![CI](https://github.com/Rusheel86/preflight/actions/workflows/ci.yml/badge.svg)](https://github.com/Rusheel86/preflight/actions/workflows/ci.yml)
6
+ [![PyPI version](https://img.shields.io/pypi/v/preflight-ml)](https://pypi.org/project/preflight-ml/)
7
+ [![Python](https://img.shields.io/pypi/pyversions/preflight-ml)](https://pypi.org/project/preflight-ml/)
8
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
9
+
10
+ ---
11
+
12
+ Most deep learning bugs don't crash your training loop — they silently produce a garbage model.
13
+ NaNs in your data, labels leaking between train and val, wrong channel ordering, dead gradients.
14
+ You won't know until hours later, after the GPU bill has landed.
15
+
16
+ **preflight** is a pre-training validation tool you run in 30 seconds before starting any training job.
17
+ It's not a linter. It's a pre-flight check — the kind pilots run before the expensive thing takes off.
18
+
19
+ ---
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install preflight-ml
25
+ ```
26
+
27
+ ## Quickstart
28
+
29
+ Create a small Python file that exposes your dataloader:
30
+
31
+ ```python
32
+ # my_dataloader.py
33
+ import torch
34
+ from torch.utils.data import DataLoader, TensorDataset
35
+
36
+ x = torch.randn(200, 3, 224, 224)
37
+ y = torch.randint(0, 10, (200,))
38
+ dataloader = DataLoader(TensorDataset(x, y), batch_size=32)
39
+ ```
40
+
41
+ Run preflight:
42
+
43
+ ```bash
44
+ preflight run --dataloader my_dataloader.py
45
+ ```
46
+
47
+ Output:
48
+
49
+ ```
50
+ preflight — pre-training check report
51
+ ╭────────────────────────┬──────────┬────────┬──────────────────────────────────────────────────╮
52
+ │ Check │ Severity │ Status │ Message │
53
+ ├────────────────────────┼──────────┼────────┼──────────────────────────────────────────────────┤
54
+ │ nan_inf_detection │ FATAL │ PASS │ No NaN or Inf values found in 10 sampled batches │
55
+ │ normalisation_sanity │ WARN │ PASS │ Normalisation looks reasonable (mean=0.001) │
56
+ │ channel_ordering │ WARN │ PASS │ Channel ordering looks correct (NCHW) │
57
+ │ label_leakage │ FATAL │ PASS │ No val_dataloader provided — skipped │
58
+ │ split_sizes │ INFO │ PASS │ train=200 samples │
59
+ │ vram_estimation │ WARN │ INFO │ No CUDA GPU detected — skipped │
60
+ │ class_imbalance │ WARN │ PASS │ Class distribution looks balanced │
61
+ │ shape_mismatch │ FATAL │ PASS │ No model provided — skipped │
62
+ │ gradient_check │ FATAL │ PASS │ No model+loss provided — skipped │
63
+ ╰────────────────────────┴──────────┴────────┴──────────────────────────────────────────────────╯
64
+
65
+ 0 fatal 0 warnings 9 passed
66
+
67
+ Pre-flight passed. Safe to start training.
68
+ ```
69
+
70
+ ## Checks
71
+
72
+ preflight runs 10 checks across three severity tiers. A **FATAL** failure exits with code 1 and blocks CI.
73
+
74
+ | Check | Severity | What it catches |
75
+ |---|---|---|
76
+ | `nan_inf_detection` | FATAL | NaN or Inf values anywhere in sampled batches |
77
+ | `label_leakage` | FATAL | Samples appearing in both train and val sets |
78
+ | `shape_mismatch` | FATAL | Dataset output shape incompatible with model input |
79
+ | `gradient_check` | FATAL | Zero gradients, dead layers, exploding gradients |
80
+ | `normalisation_sanity` | WARN | Data that looks unnormalised (raw pixel values etc.) |
81
+ | `channel_ordering` | WARN | NHWC tensors when PyTorch expects NCHW |
82
+ | `vram_estimation` | WARN | Estimated peak VRAM exceeds 90% of GPU memory |
83
+ | `class_imbalance` | WARN | Severe class imbalance beyond configurable threshold |
84
+ | `split_sizes` | INFO | Empty or degenerate train/val splits |
85
+ | `duplicate_samples` | INFO | Identical samples within a split |
86
+
87
+ ## With a model
88
+
89
+ Pass a model file to enable shape, gradient, and VRAM checks:
90
+
91
+ ```python
92
+ # my_model.py
93
+ import torch.nn as nn
94
+ model = nn.Sequential(nn.Flatten(), nn.Linear(3 * 224 * 224, 10))
95
+ ```
96
+
97
+ ```python
98
+ # my_loss.py
99
+ import torch.nn as nn
100
+ loss_fn = nn.CrossEntropyLoss()
101
+ ```
102
+
103
+ ```bash
104
+ preflight run \
105
+ --dataloader my_dataloader.py \
106
+ --model my_model.py \
107
+ --loss my_loss.py \
108
+ --val-dataloader my_val_dataloader.py
109
+ ```
110
+
111
+ ## Configuration
112
+
113
+ Add a `.preflight.toml` to your repo root to configure thresholds and disable checks:
114
+
115
+ ```toml
116
+ [thresholds]
117
+ imbalance_threshold = 0.05
118
+ nan_sample_batches = 20
119
+
120
+ [checks]
121
+ vram_estimation = false
122
+
123
+ [ignore]
124
+ # check = "class_imbalance"
125
+ # reason = "intentional: rare event dataset"
126
+ ```
127
+
128
+ ## CI integration
129
+
130
+ Add to your GitHub Actions workflow:
131
+
132
+ ```yaml
133
+ - name: Install preflight
134
+ run: pip install preflight-ml
135
+
136
+ - name: Run pre-flight checks
137
+ run: preflight run --dataloader scripts/dataloader.py --format json
138
+ ```
139
+
140
+ The `--format json` flag outputs machine-readable results. Exit code is `1` if any FATAL check fails, `0` otherwise.
141
+
142
+ ## List all checks
143
+
144
+ ```bash
145
+ preflight checks
146
+ ```
147
+
148
+ ## What preflight does NOT do
149
+
150
+ - It does not replace unit tests. Use pytest for code logic.
151
+ - It does not guarantee a correct model. Passing preflight is a minimum safety bar, not a certification.
152
+ - It does not run your full training loop. Use it as a gate before training starts.
153
+ - It does not modify your code unless you pass `--fix`.
154
+
155
+ ## Roadmap
156
+
157
+ - [ ] `--fix` flag — auto-patch common issues (channel ordering, normalisation)
158
+ - [ ] Dataset snapshot + drift detection (`preflight diff baseline.json new_data.pt`)
159
+ - [ ] Full dry-run mode (one batch through model + loss + backward)
160
+ - [ ] Jupyter magic command (`%load_ext preflight`)
161
+ - [ ] `preflight-monai` plugin for medical imaging checks
162
+ - [ ] `preflight-sktime` plugin for time series checks
163
+
164
+ ## Contributing
165
+
166
+ See [CONTRIBUTING.md](CONTRIBUTING.md). New checks are welcome — each one needs a passing test,
167
+ a failing test, and a fix hint.
168
+
169
+ ## License
170
+
171
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,7 @@
1
+ """preflight — pre-flight checks for PyTorch pipelines."""
2
+
3
+ from preflight.registry import CheckResult, Registry, Severity
4
+ from preflight.runner import run_checks
5
+
6
+ __version__ = "0.1.0"
7
+ __all__ = ["Registry", "CheckResult", "Severity", "run_checks"]