flashseg 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flashseg-1.0.0/LICENSE +21 -0
- flashseg-1.0.0/PKG-INFO +307 -0
- flashseg-1.0.0/README.md +256 -0
- flashseg-1.0.0/flashseg/__init__.py +20 -0
- flashseg-1.0.0/flashseg/analytics/__init__.py +6 -0
- flashseg-1.0.0/flashseg/analytics/benchmark.py +65 -0
- flashseg-1.0.0/flashseg/analytics/profiler.py +42 -0
- flashseg-1.0.0/flashseg/cfg/__init__.py +3 -0
- flashseg-1.0.0/flashseg/cfg/config.py +113 -0
- flashseg-1.0.0/flashseg/cli.py +216 -0
- flashseg-1.0.0/flashseg/data/__init__.py +4 -0
- flashseg-1.0.0/flashseg/data/dataset.py +93 -0
- flashseg-1.0.0/flashseg/data/transforms.py +34 -0
- flashseg-1.0.0/flashseg/engine/__init__.py +6 -0
- flashseg-1.0.0/flashseg/engine/exporter.py +68 -0
- flashseg-1.0.0/flashseg/engine/predictor.py +115 -0
- flashseg-1.0.0/flashseg/engine/trainer.py +174 -0
- flashseg-1.0.0/flashseg/engine/validator.py +65 -0
- flashseg-1.0.0/flashseg/losses/__init__.py +3 -0
- flashseg-1.0.0/flashseg/losses/seg_losses.py +75 -0
- flashseg-1.0.0/flashseg/models/__init__.py +3 -0
- flashseg-1.0.0/flashseg/models/backbone/__init__.py +3 -0
- flashseg-1.0.0/flashseg/models/backbone/shufflenetv2.py +111 -0
- flashseg-1.0.0/flashseg/models/build.py +45 -0
- flashseg-1.0.0/flashseg/models/head/__init__.py +3 -0
- flashseg-1.0.0/flashseg/models/head/seg_head.py +48 -0
- flashseg-1.0.0/flashseg/models/neck/__init__.py +3 -0
- flashseg-1.0.0/flashseg/models/neck/fpn.py +45 -0
- flashseg-1.0.0/flashseg/nn/__init__.py +5 -0
- flashseg-1.0.0/flashseg/nn/blocks.py +97 -0
- flashseg-1.0.0/flashseg/solutions/__init__.py +8 -0
- flashseg-1.0.0/flashseg/solutions/area_calculator.py +34 -0
- flashseg-1.0.0/flashseg/solutions/background_remover.py +47 -0
- flashseg-1.0.0/flashseg/solutions/lane_detector.py +45 -0
- flashseg-1.0.0/flashseg/solutions/scene_parser.py +34 -0
- flashseg-1.0.0/flashseg/utils/__init__.py +4 -0
- flashseg-1.0.0/flashseg/utils/metrics.py +53 -0
- flashseg-1.0.0/flashseg/utils/visualization.py +33 -0
- flashseg-1.0.0/flashseg.egg-info/PKG-INFO +307 -0
- flashseg-1.0.0/flashseg.egg-info/SOURCES.txt +46 -0
- flashseg-1.0.0/flashseg.egg-info/dependency_links.txt +1 -0
- flashseg-1.0.0/flashseg.egg-info/entry_points.txt +2 -0
- flashseg-1.0.0/flashseg.egg-info/requires.txt +28 -0
- flashseg-1.0.0/flashseg.egg-info/top_level.txt +1 -0
- flashseg-1.0.0/pyproject.toml +66 -0
- flashseg-1.0.0/setup.cfg +4 -0
- flashseg-1.0.0/tests/test_losses.py +37 -0
- flashseg-1.0.0/tests/test_models.py +34 -0
flashseg-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 FlashVision
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
flashseg-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flashseg
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Ultra-lightweight real-time image segmentation with LoRA fine-tuning & Knowledge Distillation
|
|
5
|
+
Author-email: FlashVision <flashvision@github.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/FlashVision/FlashSeg
|
|
8
|
+
Project-URL: Repository, https://github.com/FlashVision/FlashSeg
|
|
9
|
+
Project-URL: Documentation, https://github.com/FlashVision/FlashSeg#readme
|
|
10
|
+
Project-URL: Issues, https://github.com/FlashVision/FlashSeg/issues
|
|
11
|
+
Keywords: segmentation,semantic-segmentation,instance-segmentation,deep-learning,edge-ai,real-time,lightweight
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
24
|
+
Requires-Python: >=3.8
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: torch>=2.0
|
|
28
|
+
Requires-Dist: torchvision>=0.15
|
|
29
|
+
Requires-Dist: numpy>=1.21
|
|
30
|
+
Requires-Dist: opencv-python>=4.5
|
|
31
|
+
Requires-Dist: Pillow>=9.0
|
|
32
|
+
Requires-Dist: PyYAML>=6.0
|
|
33
|
+
Requires-Dist: tqdm>=4.60
|
|
34
|
+
Provides-Extra: export
|
|
35
|
+
Requires-Dist: onnx>=1.12; extra == "export"
|
|
36
|
+
Requires-Dist: onnxruntime>=1.13; extra == "export"
|
|
37
|
+
Requires-Dist: onnxsim>=0.4; extra == "export"
|
|
38
|
+
Provides-Extra: analytics
|
|
39
|
+
Requires-Dist: matplotlib>=3.5; extra == "analytics"
|
|
40
|
+
Requires-Dist: seaborn>=0.12; extra == "analytics"
|
|
41
|
+
Requires-Dist: pandas>=1.4; extra == "analytics"
|
|
42
|
+
Provides-Extra: solutions
|
|
43
|
+
Requires-Dist: shapely>=2.0; extra == "solutions"
|
|
44
|
+
Provides-Extra: all
|
|
45
|
+
Requires-Dist: flashseg[analytics,export,solutions]; extra == "all"
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
50
|
+
Dynamic: license-file
|
|
51
|
+
|
|
52
|
+
<p align="center">
|
|
53
|
+
<img src="assets/logo.png" width="200" alt="FlashSeg Logo">
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
<h1 align="center">FlashSeg</h1>
|
|
57
|
+
|
|
58
|
+
<p align="center">
|
|
59
|
+
<a href="https://github.com/FlashVision/FlashSeg/actions"><img src="https://img.shields.io/github/actions/workflow/status/FlashVision/FlashSeg/ci.yml?logo=github" alt="CI"></a>
|
|
60
|
+
<img src="https://img.shields.io/badge/PyTorch-2.0+-ee4c2c?logo=pytorch&logoColor=white" alt="PyTorch">
|
|
61
|
+
<img src="https://img.shields.io/badge/Python-3.8+-3776ab?logo=python&logoColor=white" alt="Python">
|
|
62
|
+
<img src="https://img.shields.io/badge/ONNX-Export-005CED?logo=onnx&logoColor=white" alt="ONNX">
|
|
63
|
+
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License">
|
|
64
|
+
</p>
|
|
65
|
+
|
|
66
|
+
<p align="center">
|
|
67
|
+
<b>Ultra-lightweight real-time semantic segmentation with LoRA fine-tuning & knowledge distillation</b>
|
|
68
|
+
</p>
|
|
69
|
+
|
|
70
|
+
<p align="center">
|
|
71
|
+
<a href="#installation">Install</a> •
|
|
72
|
+
<a href="#usage">Usage</a> •
|
|
73
|
+
<a href="#models">Models</a> •
|
|
74
|
+
<a href="#solutions">Solutions</a> •
|
|
75
|
+
<a href="#training">Training</a> •
|
|
76
|
+
<a href="#examples">Examples</a>
|
|
77
|
+
</p>
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## What is FlashSeg?
|
|
82
|
+
|
|
83
|
+
FlashSeg is an ultra-lightweight semantic segmentation framework built for **speed and edge deployment**. Using a ShuffleNetV2 backbone with FPN neck, it delivers real-time pixel-level predictions with models as small as 0.3M parameters.
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install -e .
|
|
87
|
+
flashseg train --model-size m --num-classes 21 --train-images data/images --train-masks data/masks --val-images data/val_img --val-masks data/val_mask
|
|
88
|
+
flashseg predict --model best.pth --source images/
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Installation
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# From source
|
|
97
|
+
git clone https://github.com/FlashVision/FlashSeg.git
|
|
98
|
+
cd FlashSeg
|
|
99
|
+
pip install -e ".[all]"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Optional extras
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
pip install -e ".[export]" # ONNX export
|
|
106
|
+
pip install -e ".[analytics]" # Benchmarking, plots
|
|
107
|
+
pip install -e ".[solutions]" # Background removal, lane detection
|
|
108
|
+
pip install -e ".[all]" # Everything
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Verify
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
flashseg check
|
|
115
|
+
flashseg settings
|
|
116
|
+
flashseg version
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Usage
|
|
122
|
+
|
|
123
|
+
### Python API
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from flashseg import Trainer, Predictor, Exporter
|
|
127
|
+
|
|
128
|
+
# Train
|
|
129
|
+
trainer = Trainer(
|
|
130
|
+
model_size="m",
|
|
131
|
+
train_images="data/images",
|
|
132
|
+
train_masks="data/masks",
|
|
133
|
+
val_images="data/val_images",
|
|
134
|
+
val_masks="data/val_masks",
|
|
135
|
+
num_classes=21,
|
|
136
|
+
epochs=100,
|
|
137
|
+
device="cuda",
|
|
138
|
+
)
|
|
139
|
+
trainer.train()
|
|
140
|
+
|
|
141
|
+
# Predict
|
|
142
|
+
predictor = Predictor(model_path="workspace/best.pth", num_classes=21, device="cuda")
|
|
143
|
+
mask = predictor.predict("photo.jpg")
|
|
144
|
+
|
|
145
|
+
# Export
|
|
146
|
+
exporter = Exporter(model_path="workspace/best.pth", num_classes=21)
|
|
147
|
+
exporter.export(output="model.onnx", simplify=True)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### CLI
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
flashseg train --model-size m --epochs 100 --device cuda \
|
|
154
|
+
--train-images data/images --train-masks data/masks \
|
|
155
|
+
--val-images data/val_img --val-masks data/val_mask --num-classes 21
|
|
156
|
+
|
|
157
|
+
flashseg predict --model best.pth --source images/ --save-dir output/
|
|
158
|
+
|
|
159
|
+
flashseg val --model best.pth --val-images data/val --val-masks data/val_mask
|
|
160
|
+
|
|
161
|
+
flashseg export --model best.pth --output model.onnx --simplify
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Models
|
|
167
|
+
|
|
168
|
+
| Model | Params | FP16 Size | Input | mIoU (VOC) |
|
|
169
|
+
|-------|--------|-----------|-------|-------------|
|
|
170
|
+
| **FlashSeg-n** | 0.3M | ~0.7 MB | 256 | — |
|
|
171
|
+
| **FlashSeg-s** | 0.8M | ~1.6 MB | 256 | — |
|
|
172
|
+
| **FlashSeg-m** | 1.5M | ~3.0 MB | 512 | — |
|
|
173
|
+
| **FlashSeg-l** | 3.2M | ~6.4 MB | 512 | — |
|
|
174
|
+
|
|
175
|
+
### Config-driven Training
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
flashseg train --config configs/flashseg_m_512_voc.yaml
|
|
179
|
+
flashseg train --config configs/flashseg_s_256_cityscapes.yaml
|
|
180
|
+
flashseg train --config configs/flashseg_m_512_lora.yaml
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Solutions
|
|
186
|
+
|
|
187
|
+
Built-in high-level applications:
|
|
188
|
+
|
|
189
|
+
```python
|
|
190
|
+
from flashseg import Predictor
|
|
191
|
+
from flashseg.solutions import BackgroundRemover, LaneDetector, SceneParser, AreaCalculator
|
|
192
|
+
|
|
193
|
+
predictor = Predictor(model_path="best.pth", num_classes=21)
|
|
194
|
+
|
|
195
|
+
# Remove background
|
|
196
|
+
remover = BackgroundRemover(predictor, foreground_classes=[15])
|
|
197
|
+
result = remover.remove(image)
|
|
198
|
+
|
|
199
|
+
# Detect lanes
|
|
200
|
+
lanes = LaneDetector(predictor, lane_class_id=1)
|
|
201
|
+
|
|
202
|
+
# Parse scenes
|
|
203
|
+
parser = SceneParser(predictor, class_names=["bg", "road", "building", ...])
|
|
204
|
+
stats = parser.parse(image) # {"road": 35.2, "building": 12.1, ...}
|
|
205
|
+
|
|
206
|
+
# Calculate areas
|
|
207
|
+
calc = AreaCalculator(predictor, pixels_per_meter=10.0)
|
|
208
|
+
areas = calc.calculate(image) # {class_id: area_m2, ...}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
| Solution | Description |
|
|
212
|
+
|----------|-------------|
|
|
213
|
+
| **BackgroundRemover** | Remove/replace backgrounds, generate alpha mattes |
|
|
214
|
+
| **LaneDetector** | Detect road lanes from segmentation |
|
|
215
|
+
| **SceneParser** | Break scene into labeled regions with area stats |
|
|
216
|
+
| **AreaCalculator** | Measure real-world areas from masks |
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Training
|
|
221
|
+
|
|
222
|
+
### Standard
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
flashseg train --model-size m --epochs 100 --num-classes 21 --device cuda
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### LoRA Fine-Tuning
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
flashseg train --model-size m --lora --config configs/flashseg_m_512_lora.yaml
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Mixed Precision
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
flashseg train --model-size m --amp --device cuda
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Examples
|
|
243
|
+
|
|
244
|
+
| Script | What it does |
|
|
245
|
+
|--------|--------------|
|
|
246
|
+
| `train_voc.py` | Train on Pascal VOC |
|
|
247
|
+
| `predict_image.py` | Segment a single image |
|
|
248
|
+
| `background_removal.py` | Remove image background |
|
|
249
|
+
| `export_onnx.py` | Export to ONNX |
|
|
250
|
+
| `benchmark_model.py` | Measure FPS and latency |
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Project Structure
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
FlashSeg/
|
|
258
|
+
├── flashseg/ # Main package
|
|
259
|
+
│ ├── cfg/ # Configuration + YAML loading
|
|
260
|
+
│ ├── data/ # Datasets, transforms
|
|
261
|
+
│ ├── engine/ # Trainer, Predictor, Exporter, Validator
|
|
262
|
+
│ ├── models/ # ShuffleNetV2, FPN, SegHead
|
|
263
|
+
│ ├── losses/ # CE, Dice, Focal, Combined
|
|
264
|
+
│ ├── nn/ # ConvBnRelu, ASPP, DepthwiseSeparable
|
|
265
|
+
│ ├── utils/ # Metrics, visualization
|
|
266
|
+
│ ├── solutions/ # Background removal, lanes, scene parsing
|
|
267
|
+
│ └── analytics/ # Benchmark, profiler
|
|
268
|
+
├── configs/ # YAML configs (pick & train)
|
|
269
|
+
├── examples/ # Ready-to-run scripts
|
|
270
|
+
├── tests/ # Unit tests
|
|
271
|
+
├── docker/ # Dockerfile + compose
|
|
272
|
+
├── pyproject.toml # Package config
|
|
273
|
+
└── LICENSE # MIT
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
---
|
|
277
|
+
|
|
278
|
+
## Docker
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
docker build -t flashseg -f docker/Dockerfile .
|
|
282
|
+
docker run --gpus all -v $(pwd)/data:/app/data flashseg predict --model best.pth --source data/
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## Contributing
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
git clone https://github.com/FlashVision/FlashSeg.git
|
|
291
|
+
cd FlashSeg
|
|
292
|
+
pip install -e ".[dev,all]"
|
|
293
|
+
ruff check flashseg/
|
|
294
|
+
pytest tests/ -v
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## License
|
|
300
|
+
|
|
301
|
+
MIT — see [LICENSE](LICENSE).
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
<p align="center">
|
|
306
|
+
<a href="https://github.com/FlashVision"><b>FlashVision</b></a> — Open-source lightweight AI
|
|
307
|
+
</p>
|
flashseg-1.0.0/README.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/logo.png" width="200" alt="FlashSeg Logo">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">FlashSeg</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="https://github.com/FlashVision/FlashSeg/actions"><img src="https://img.shields.io/github/actions/workflow/status/FlashVision/FlashSeg/ci.yml?logo=github" alt="CI"></a>
|
|
9
|
+
<img src="https://img.shields.io/badge/PyTorch-2.0+-ee4c2c?logo=pytorch&logoColor=white" alt="PyTorch">
|
|
10
|
+
<img src="https://img.shields.io/badge/Python-3.8+-3776ab?logo=python&logoColor=white" alt="Python">
|
|
11
|
+
<img src="https://img.shields.io/badge/ONNX-Export-005CED?logo=onnx&logoColor=white" alt="ONNX">
|
|
12
|
+
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="License">
|
|
13
|
+
</p>
|
|
14
|
+
|
|
15
|
+
<p align="center">
|
|
16
|
+
<b>Ultra-lightweight real-time semantic segmentation with LoRA fine-tuning & knowledge distillation</b>
|
|
17
|
+
</p>
|
|
18
|
+
|
|
19
|
+
<p align="center">
|
|
20
|
+
<a href="#installation">Install</a> •
|
|
21
|
+
<a href="#usage">Usage</a> •
|
|
22
|
+
<a href="#models">Models</a> •
|
|
23
|
+
<a href="#solutions">Solutions</a> •
|
|
24
|
+
<a href="#training">Training</a> •
|
|
25
|
+
<a href="#examples">Examples</a>
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## What is FlashSeg?
|
|
31
|
+
|
|
32
|
+
FlashSeg is an ultra-lightweight semantic segmentation framework built for **speed and edge deployment**. Using a ShuffleNetV2 backbone with FPN neck, it delivers real-time pixel-level predictions with models as small as 0.3M parameters.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install -e .
|
|
36
|
+
flashseg train --model-size m --num-classes 21 --train-images data/images --train-masks data/masks --val-images data/val_img --val-masks data/val_mask
|
|
37
|
+
flashseg predict --model best.pth --source images/
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# From source
|
|
46
|
+
git clone https://github.com/FlashVision/FlashSeg.git
|
|
47
|
+
cd FlashSeg
|
|
48
|
+
pip install -e ".[all]"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Optional extras
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install -e ".[export]" # ONNX export
|
|
55
|
+
pip install -e ".[analytics]" # Benchmarking, plots
|
|
56
|
+
pip install -e ".[solutions]" # Background removal, lane detection
|
|
57
|
+
pip install -e ".[all]" # Everything
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Verify
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
flashseg check
|
|
64
|
+
flashseg settings
|
|
65
|
+
flashseg version
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
### Python API
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from flashseg import Trainer, Predictor, Exporter
|
|
76
|
+
|
|
77
|
+
# Train
|
|
78
|
+
trainer = Trainer(
|
|
79
|
+
model_size="m",
|
|
80
|
+
train_images="data/images",
|
|
81
|
+
train_masks="data/masks",
|
|
82
|
+
val_images="data/val_images",
|
|
83
|
+
val_masks="data/val_masks",
|
|
84
|
+
num_classes=21,
|
|
85
|
+
epochs=100,
|
|
86
|
+
device="cuda",
|
|
87
|
+
)
|
|
88
|
+
trainer.train()
|
|
89
|
+
|
|
90
|
+
# Predict
|
|
91
|
+
predictor = Predictor(model_path="workspace/best.pth", num_classes=21, device="cuda")
|
|
92
|
+
mask = predictor.predict("photo.jpg")
|
|
93
|
+
|
|
94
|
+
# Export
|
|
95
|
+
exporter = Exporter(model_path="workspace/best.pth", num_classes=21)
|
|
96
|
+
exporter.export(output="model.onnx", simplify=True)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### CLI
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
flashseg train --model-size m --epochs 100 --device cuda \
|
|
103
|
+
--train-images data/images --train-masks data/masks \
|
|
104
|
+
--val-images data/val_img --val-masks data/val_mask --num-classes 21
|
|
105
|
+
|
|
106
|
+
flashseg predict --model best.pth --source images/ --save-dir output/
|
|
107
|
+
|
|
108
|
+
flashseg val --model best.pth --val-images data/val --val-masks data/val_mask
|
|
109
|
+
|
|
110
|
+
flashseg export --model best.pth --output model.onnx --simplify
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Models
|
|
116
|
+
|
|
117
|
+
| Model | Params | FP16 Size | Input | mIoU (VOC) |
|
|
118
|
+
|-------|--------|-----------|-------|-------------|
|
|
119
|
+
| **FlashSeg-n** | 0.3M | ~0.7 MB | 256 | — |
|
|
120
|
+
| **FlashSeg-s** | 0.8M | ~1.6 MB | 256 | — |
|
|
121
|
+
| **FlashSeg-m** | 1.5M | ~3.0 MB | 512 | — |
|
|
122
|
+
| **FlashSeg-l** | 3.2M | ~6.4 MB | 512 | — |
|
|
123
|
+
|
|
124
|
+
### Config-driven Training
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
flashseg train --config configs/flashseg_m_512_voc.yaml
|
|
128
|
+
flashseg train --config configs/flashseg_s_256_cityscapes.yaml
|
|
129
|
+
flashseg train --config configs/flashseg_m_512_lora.yaml
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Solutions
|
|
135
|
+
|
|
136
|
+
Built-in high-level applications:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from flashseg import Predictor
|
|
140
|
+
from flashseg.solutions import BackgroundRemover, LaneDetector, SceneParser, AreaCalculator
|
|
141
|
+
|
|
142
|
+
predictor = Predictor(model_path="best.pth", num_classes=21)
|
|
143
|
+
|
|
144
|
+
# Remove background
|
|
145
|
+
remover = BackgroundRemover(predictor, foreground_classes=[15])
|
|
146
|
+
result = remover.remove(image)
|
|
147
|
+
|
|
148
|
+
# Detect lanes
|
|
149
|
+
lanes = LaneDetector(predictor, lane_class_id=1)
|
|
150
|
+
|
|
151
|
+
# Parse scenes
|
|
152
|
+
parser = SceneParser(predictor, class_names=["bg", "road", "building", ...])
|
|
153
|
+
stats = parser.parse(image) # {"road": 35.2, "building": 12.1, ...}
|
|
154
|
+
|
|
155
|
+
# Calculate areas
|
|
156
|
+
calc = AreaCalculator(predictor, pixels_per_meter=10.0)
|
|
157
|
+
areas = calc.calculate(image) # {class_id: area_m2, ...}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
| Solution | Description |
|
|
161
|
+
|----------|-------------|
|
|
162
|
+
| **BackgroundRemover** | Remove/replace backgrounds, generate alpha mattes |
|
|
163
|
+
| **LaneDetector** | Detect road lanes from segmentation |
|
|
164
|
+
| **SceneParser** | Break scene into labeled regions with area stats |
|
|
165
|
+
| **AreaCalculator** | Measure real-world areas from masks |
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Training
|
|
170
|
+
|
|
171
|
+
### Standard
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
flashseg train --model-size m --epochs 100 --num-classes 21 --device cuda
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### LoRA Fine-Tuning
|
|
178
|
+
|
|
179
|
+
```bash
|
|
180
|
+
flashseg train --model-size m --lora --config configs/flashseg_m_512_lora.yaml
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Mixed Precision
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
flashseg train --model-size m --amp --device cuda
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Examples
|
|
192
|
+
|
|
193
|
+
| Script | What it does |
|
|
194
|
+
|--------|--------------|
|
|
195
|
+
| `train_voc.py` | Train on Pascal VOC |
|
|
196
|
+
| `predict_image.py` | Segment a single image |
|
|
197
|
+
| `background_removal.py` | Remove image background |
|
|
198
|
+
| `export_onnx.py` | Export to ONNX |
|
|
199
|
+
| `benchmark_model.py` | Measure FPS and latency |
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## Project Structure
|
|
204
|
+
|
|
205
|
+
```
|
|
206
|
+
FlashSeg/
|
|
207
|
+
├── flashseg/ # Main package
|
|
208
|
+
│ ├── cfg/ # Configuration + YAML loading
|
|
209
|
+
│ ├── data/ # Datasets, transforms
|
|
210
|
+
│ ├── engine/ # Trainer, Predictor, Exporter, Validator
|
|
211
|
+
│ ├── models/ # ShuffleNetV2, FPN, SegHead
|
|
212
|
+
│ ├── losses/ # CE, Dice, Focal, Combined
|
|
213
|
+
│ ├── nn/ # ConvBnRelu, ASPP, DepthwiseSeparable
|
|
214
|
+
│ ├── utils/ # Metrics, visualization
|
|
215
|
+
│ ├── solutions/ # Background removal, lanes, scene parsing
|
|
216
|
+
│ └── analytics/ # Benchmark, profiler
|
|
217
|
+
├── configs/ # YAML configs (pick & train)
|
|
218
|
+
├── examples/ # Ready-to-run scripts
|
|
219
|
+
├── tests/ # Unit tests
|
|
220
|
+
├── docker/ # Dockerfile + compose
|
|
221
|
+
├── pyproject.toml # Package config
|
|
222
|
+
└── LICENSE # MIT
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Docker
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
docker build -t flashseg -f docker/Dockerfile .
|
|
231
|
+
docker run --gpus all -v $(pwd)/data:/app/data flashseg predict --model best.pth --source data/
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## Contributing
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
git clone https://github.com/FlashVision/FlashSeg.git
|
|
240
|
+
cd FlashSeg
|
|
241
|
+
pip install -e ".[dev,all]"
|
|
242
|
+
ruff check flashseg/
|
|
243
|
+
pytest tests/ -v
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
MIT — see [LICENSE](LICENSE).
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
<p align="center">
|
|
255
|
+
<a href="https://github.com/FlashVision"><b>FlashVision</b></a> — Open-source lightweight AI
|
|
256
|
+
</p>
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""FlashSeg - Ultra-lightweight real-time image segmentation."""
|
|
2
|
+
|
|
3
|
+
__version__ = "1.0.0"
|
|
4
|
+
|
|
5
|
+
from flashseg.cfg.config import get_config
|
|
6
|
+
from flashseg.engine.trainer import Trainer
|
|
7
|
+
from flashseg.engine.predictor import Predictor
|
|
8
|
+
from flashseg.engine.exporter import Exporter
|
|
9
|
+
from flashseg.engine.validator import Validator
|
|
10
|
+
from flashseg.models.build import build_model
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"__version__",
|
|
14
|
+
"get_config",
|
|
15
|
+
"build_model",
|
|
16
|
+
"Trainer",
|
|
17
|
+
"Predictor",
|
|
18
|
+
"Exporter",
|
|
19
|
+
"Validator",
|
|
20
|
+
]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Model benchmarking for segmentation."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
import torch
|
|
7
|
+
|
|
8
|
+
from flashseg.cfg.config import get_config
|
|
9
|
+
from flashseg.models.build import build_model
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Benchmark:
|
|
15
|
+
"""Benchmark FlashSeg model speed and efficiency."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, model_path: str = None, model_size: str = "m", input_size: int = 512, num_classes: int = 21, device: str = "cuda"):
|
|
18
|
+
self.device = torch.device(device if torch.cuda.is_available() or device == "cpu" else "cpu")
|
|
19
|
+
config = get_config(model_size=model_size, input_size=input_size, num_classes=num_classes)
|
|
20
|
+
self.model = build_model(config).to(self.device)
|
|
21
|
+
|
|
22
|
+
if model_path:
|
|
23
|
+
self.model.load_state_dict(torch.load(model_path, map_location=self.device))
|
|
24
|
+
|
|
25
|
+
self.model.eval()
|
|
26
|
+
self.input_size = input_size
|
|
27
|
+
|
|
28
|
+
def run(self, warmup: int = 10, iterations: int = 100) -> dict:
|
|
29
|
+
"""Run benchmark and return timing results."""
|
|
30
|
+
dummy = torch.randn(1, 3, self.input_size, self.input_size).to(self.device)
|
|
31
|
+
|
|
32
|
+
# Warmup
|
|
33
|
+
with torch.no_grad():
|
|
34
|
+
for _ in range(warmup):
|
|
35
|
+
self.model(dummy)
|
|
36
|
+
|
|
37
|
+
if self.device.type == "cuda":
|
|
38
|
+
torch.cuda.synchronize()
|
|
39
|
+
|
|
40
|
+
# Benchmark
|
|
41
|
+
times = []
|
|
42
|
+
with torch.no_grad():
|
|
43
|
+
for _ in range(iterations):
|
|
44
|
+
start = time.perf_counter()
|
|
45
|
+
self.model(dummy)
|
|
46
|
+
if self.device.type == "cuda":
|
|
47
|
+
torch.cuda.synchronize()
|
|
48
|
+
times.append(time.perf_counter() - start)
|
|
49
|
+
|
|
50
|
+
avg_ms = sum(times) / len(times) * 1000
|
|
51
|
+
fps = 1000.0 / avg_ms
|
|
52
|
+
params = sum(p.numel() for p in self.model.parameters())
|
|
53
|
+
|
|
54
|
+
results = {
|
|
55
|
+
"latency_ms": round(avg_ms, 2),
|
|
56
|
+
"fps": round(fps, 1),
|
|
57
|
+
"params": params,
|
|
58
|
+
"params_m": round(params / 1e6, 2),
|
|
59
|
+
"size_mb": round(params * 4 / 1024 / 1024, 2),
|
|
60
|
+
"device": str(self.device),
|
|
61
|
+
"input_size": self.input_size,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
logger.info(f"Benchmark: {fps:.1f} FPS, {avg_ms:.2f}ms, {params / 1e6:.2f}M params")
|
|
65
|
+
return results
|