online-face-detection 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- online_face_detection-0.1.0/.gitignore +22 -0
- online_face_detection-0.1.0/.python-version +1 -0
- online_face_detection-0.1.0/LICENSE +21 -0
- online_face_detection-0.1.0/PKG-INFO +330 -0
- online_face_detection-0.1.0/README.md +262 -0
- online_face_detection-0.1.0/examples/demo.py +37 -0
- online_face_detection-0.1.0/online_face/__init__.py +35 -0
- online_face_detection-0.1.0/online_face/__main__.py +29 -0
- online_face_detection-0.1.0/online_face/_wire.py +72 -0
- online_face_detection-0.1.0/online_face/cli/__init__.py +1 -0
- online_face_detection-0.1.0/online_face/cli/export.py +32 -0
- online_face_detection-0.1.0/online_face/cli/run.py +64 -0
- online_face_detection-0.1.0/online_face/client.py +70 -0
- online_face_detection-0.1.0/online_face/detector.py +152 -0
- online_face_detection-0.1.0/online_face/families/__init__.py +45 -0
- online_face_detection-0.1.0/online_face/families/base.py +82 -0
- online_face_detection-0.1.0/online_face/families/retinaface.py +148 -0
- online_face_detection-0.1.0/online_face/models/retinaface/__init__.py +7 -0
- online_face_detection-0.1.0/online_face/models/retinaface/box.py +72 -0
- online_face_detection-0.1.0/online_face/models/retinaface/net.py +211 -0
- online_face_detection-0.1.0/online_face/py.typed +0 -0
- online_face_detection-0.1.0/online_face/registry.py +171 -0
- online_face_detection-0.1.0/online_face/runtime/__init__.py +33 -0
- online_face_detection-0.1.0/online_face/runtime/backends/__init__.py +92 -0
- online_face_detection-0.1.0/online_face/runtime/backends/base.py +39 -0
- online_face_detection-0.1.0/online_face/runtime/backends/onnx_backend.py +67 -0
- online_face_detection-0.1.0/online_face/runtime/backends/torch_backend.py +30 -0
- online_face_detection-0.1.0/online_face/runtime/backends/torchscript_backend.py +29 -0
- online_face_detection-0.1.0/online_face/runtime/backends/trt_backend.py +53 -0
- online_face_detection-0.1.0/online_face/runtime/cache.py +155 -0
- online_face_detection-0.1.0/online_face/runtime/config.py +36 -0
- online_face_detection-0.1.0/online_face/runtime/device.py +142 -0
- online_face_detection-0.1.0/online_face/runtime/errors.py +42 -0
- online_face_detection-0.1.0/online_face/runtime/export.py +100 -0
- online_face_detection-0.1.0/online_face/runtime/logging.py +48 -0
- online_face_detection-0.1.0/online_face/runtime/sources.py +247 -0
- online_face_detection-0.1.0/online_face/runtime/streaming.py +172 -0
- online_face_detection-0.1.0/online_face/runtime/tensor.py +145 -0
- online_face_detection-0.1.0/online_face/runtime/timing.py +96 -0
- online_face_detection-0.1.0/online_face/runtime/viz/__init__.py +6 -0
- online_face_detection-0.1.0/online_face/runtime/viz/draw.py +60 -0
- online_face_detection-0.1.0/online_face/serve.py +113 -0
- online_face_detection-0.1.0/pyproject.toml +64 -0
- online_face_detection-0.1.0/tests/test_smoke.py +65 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
build/
|
|
5
|
+
dist/
|
|
6
|
+
wheels/
|
|
7
|
+
*.egg-info/
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
|
|
13
|
+
# Tooling / OS
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.DS_Store
|
|
18
|
+
|
|
19
|
+
# Local artifacts / outputs
|
|
20
|
+
out/
|
|
21
|
+
*.mp4
|
|
22
|
+
!**/test/**/*.mp4
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.11
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Surya Chand Rayala
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: online-face-detection
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Streaming, frame-by-frame face detection (RetinaFace) with a unified torch/torchscript/onnx/trt runtime and export-once caching for edge devices.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Surya-Rayala/online-face-detection
|
|
6
|
+
Project-URL: Repository, https://github.com/Surya-Rayala/online-face-detection
|
|
7
|
+
Project-URL: Issues, https://github.com/Surya-Rayala/online-face-detection/issues
|
|
8
|
+
Author-email: Surya Chand Rayala <suryachand2k1@gmail.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Surya Chand Rayala
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: edge,face-detection,jetson,onnx,retinaface,streaming,tensorrt
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: Intended Audience :: Science/Research
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
43
|
+
Requires-Python: <3.13,>=3.10
|
|
44
|
+
Requires-Dist: numpy>=1.23
|
|
45
|
+
Requires-Dist: opencv-python>=4.7
|
|
46
|
+
Requires-Dist: tqdm>=4.65
|
|
47
|
+
Provides-Extra: client
|
|
48
|
+
Requires-Dist: requests>=2.31; extra == 'client'
|
|
49
|
+
Provides-Extra: dev
|
|
50
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
51
|
+
Requires-Dist: pytest>=7.4; extra == 'dev'
|
|
52
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
53
|
+
Provides-Extra: onnx
|
|
54
|
+
Requires-Dist: onnx>=1.15; extra == 'onnx'
|
|
55
|
+
Requires-Dist: onnxruntime>=1.16; extra == 'onnx'
|
|
56
|
+
Requires-Dist: onnxsim>=0.4.33; extra == 'onnx'
|
|
57
|
+
Provides-Extra: serve
|
|
58
|
+
Requires-Dist: fastapi>=0.110; extra == 'serve'
|
|
59
|
+
Requires-Dist: python-multipart>=0.0.9; extra == 'serve'
|
|
60
|
+
Requires-Dist: uvicorn[standard]>=0.27; extra == 'serve'
|
|
61
|
+
Provides-Extra: torch
|
|
62
|
+
Requires-Dist: retinaface-pytorch>=0.0.7; extra == 'torch'
|
|
63
|
+
Requires-Dist: torch>=2.1; extra == 'torch'
|
|
64
|
+
Requires-Dist: torchvision>=0.16; extra == 'torch'
|
|
65
|
+
Provides-Extra: trt
|
|
66
|
+
Requires-Dist: tensorrt>=8.6; extra == 'trt'
|
|
67
|
+
Description-Content-Type: text/markdown
|
|
68
|
+
|
|
69
|
+
# online-face-detection
|
|
70
|
+
|
|
71
|
+
Streaming, **frame-by-frame** face detection for real-time pipelines:
|
|
72
|
+
one small object — a frame in, structured results out. Runs under **torch / torchscript /
|
|
73
|
+
onnx / tensorrt** with export-once caching, on CPU, CUDA, Apple Silicon (MPS), and Jetson.
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from online_face import FaceDetector
|
|
77
|
+
det = FaceDetector("retinaface", device="auto")
|
|
78
|
+
res = det(frame) # res.boxes, res.scores, res.landmarks
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
> **Models today:** RetinaFace. More face-detection families plug in via the registry — coming later.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Install
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install "online-face-detection[torch]"
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
That's all you need for most setups — `[torch]` is the default runtime and works on CPU,
|
|
92
|
+
CUDA, and Mac (MPS). Other backends (`onnx`, `tensorrt`, serving) are **optional extras** you
|
|
93
|
+
can add anytime — see [Install options](#install-options). (Prefer `uv`? See [Misc](#misc).)
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## Use it (Python)
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from online_face import FaceDetector
|
|
101
|
+
|
|
102
|
+
det = FaceDetector(
|
|
103
|
+
"retinaface", # model family (the only one today)
|
|
104
|
+
device="auto", # "auto" (CUDA > MPS > CPU) | "cpu" | "cuda" | "mps"
|
|
105
|
+
runtime="auto", # "auto" | "torch" | "torchscript" | "onnx" | "trt"
|
|
106
|
+
conf=0.5, # detection confidence threshold
|
|
107
|
+
nms=0.4, # NMS IoU threshold
|
|
108
|
+
)
|
|
109
|
+
res = det(frame) # weights auto-download on first use; see "Input" below
|
|
110
|
+
|
|
111
|
+
res.boxes # (N, 4) xyxy, in original-frame coordinates
|
|
112
|
+
res.scores # (N,)
|
|
113
|
+
res.landmarks # (N, 5, 2)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Input — what `frame` must be:** a NumPy array in **BGR** order, shape **`(H, W, 3)`**, dtype
|
|
117
|
+
**`uint8`** (OpenCV's native format — e.g. straight from `cv2.imread(...)` or
|
|
118
|
+
`cv2.VideoCapture(...).read()`), **or** a `torch.Tensor` of shape **`(3, H, W)`**. Any
|
|
119
|
+
resolution; the model letterboxes internally.
|
|
120
|
+
|
|
121
|
+
Drive a video file or a live stream (FPS/latency print to the terminal):
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
for frame_ref, res in det.run_source("video.mp4"): # a file
|
|
125
|
+
...
|
|
126
|
+
for frame_ref, res in det.run_source("rtsp://cam/stream", is_stream=True): # live
|
|
127
|
+
...
|
|
128
|
+
for frame_ref, res in det.run_source("video.mp4", is_stream=True): # file as a stream
|
|
129
|
+
...
|
|
130
|
+
# frame_ref.image is the BGR frame; res is the FaceFrameResult for that frame.
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
**Output —** `FaceFrameResult`: `boxes (N,4)` xyxy · `scores (N,)` · `landmarks (N,5,2)` ·
|
|
134
|
+
`frame_index` · `shape (H,W)`. Coordinates are in the **original** frame. `det.stats.as_dict()`
|
|
135
|
+
gives rolling fps / latency / per-stage timings.
|
|
136
|
+
|
|
137
|
+
### Or from the terminal
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# detect on a video file and show a window (boxes + landmarks + FPS; press q/ESC to quit)
|
|
141
|
+
online-face --source video.mp4 --device auto --runtime auto --conf 0.5 --nms 0.4 --display
|
|
142
|
+
|
|
143
|
+
# webcam (index 0) as a live stream
|
|
144
|
+
online-face --source 0 --device auto --runtime auto --stream --display
|
|
145
|
+
|
|
146
|
+
# headless: write an annotated mp4 instead of showing a window
|
|
147
|
+
online-face --source video.mp4 --device auto --runtime auto --save-video out.mp4
|
|
148
|
+
|
|
149
|
+
# discover weights, or see every flag
|
|
150
|
+
online-face --list-weights
|
|
151
|
+
online-face --help
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
`online-face` == `python -m online_face.cli.run`. All flags:
|
|
155
|
+
`--source` (file path | webcam index | rtsp/http url) · `--device {auto,cpu,cuda,mps}` ·
|
|
156
|
+
`--runtime {auto,torch,torchscript,onnx,trt}` · `--conf` · `--nms` · `--stream` · `--display` ·
|
|
157
|
+
`--save-video PATH` · `--max-frames N` · `--list-weights`.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Models & weights
|
|
162
|
+
|
|
163
|
+
`model` is the **family** (`retinaface` — the only one today); `weights` is the actual weight —
|
|
164
|
+
a known key (auto-downloaded) or a file path. `weights=None` uses the default.
|
|
165
|
+
|
|
166
|
+
| weights key | impl | exportable | notes |
|
|
167
|
+
|-------------|------|------------|-------|
|
|
168
|
+
| `mobilenet0.25` *(default)* | biubug6 | onnx / trt | light, edge-friendly; auto-downloads (~1.7 MB) |
|
|
169
|
+
| `resnet50` | biubug6 | onnx / trt | higher accuracy; auto-downloads (~109 MB, sha256-checked) |
|
|
170
|
+
| `ternaus_resnet50` | ternaus | torch-only | a convenience weight; works out of the box |
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
FaceDetector("retinaface", weights="mobilenet0.25") # default, auto-downloads
|
|
174
|
+
FaceDetector("retinaface", weights="/models/retinaface.onnx", runtime="onnx") # a ready artifact
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
**`resnet50` is auto-downloaded** (~109 MB, sha256-verified) from the official biubug6 mirror on
|
|
178
|
+
first use — nothing to do. If Google Drive ever rate-limits you, download `Resnet50_Final.pth` from
|
|
179
|
+
[biubug6/Pytorch_Retinaface](https://drive.google.com/file/d/14KX6VqF69MdSPk3Tr9PlDYbq7ArpdNUW/view) and pass the path:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
FaceDetector("retinaface", weights="/path/to/Resnet50_Final.pth") # or --weights on the CLI/serve
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
…or drop it at `~/.cache/online_inference/weights/retinaface_resnet50.pth` and use `weights="resnet50"`.
|
|
186
|
+
(Keep `resnet`/`r50` in the filename — the arch is inferred from the name. The same applies to any
|
|
187
|
+
custom weight file.)
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Runtimes & the export cache
|
|
192
|
+
|
|
193
|
+
`runtime="auto"` picks the best backend per device: **Jetson/CUDA → tensorrt** (else onnx-CUDA),
|
|
194
|
+
**macOS → torch (MPS)**, **CPU → onnx/torch**. The first time a non-torch runtime is used, the
|
|
195
|
+
artifact (torchscript / onnx / trt engine) is **built once and cached** under
|
|
196
|
+
`~/.cache/online_inference/` (override with `$ONLINE_INFERENCE_CACHE`); later runs load it.
|
|
197
|
+
TensorRT engines are keyed to the exact GPU/JetPack so they never load on the wrong device.
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Install options
|
|
202
|
+
|
|
203
|
+
`[torch]` is all most people need. Add extras for other backends. **Extras are additive** — if
|
|
204
|
+
you already installed `[torch]`, running `pip install "online-face-detection[serve]"` later just
|
|
205
|
+
adds those packages (it won't reinstall torch). You can also install several at once:
|
|
206
|
+
`pip install "online-face-detection[torch,onnx,serve]"`.
|
|
207
|
+
|
|
208
|
+
| Extra | Adds | Install when you want to… |
|
|
209
|
+
|-------|------|---------------------------|
|
|
210
|
+
| `[torch]` | torch, torchvision, retinaface-pytorch | **default** runtime (CPU / CUDA / MPS) |
|
|
211
|
+
| `[onnx]` | onnxruntime, onnx, onnxsim | run or export the ONNX backend |
|
|
212
|
+
| `[trt]` | tensorrt | build/run TensorRT engines (NVIDIA) |
|
|
213
|
+
| `[serve]` | fastapi, uvicorn | host the model as an HTTP service (below) |
|
|
214
|
+
| `[client]` | requests | call a remote service (torch-free, below) |
|
|
215
|
+
|
|
216
|
+
**Which do I actually need?**
|
|
217
|
+
- `pip install online-face-detection` (no `[...]`) → **core only** (numpy/opencv); **no runtime, can't run inference**. Use this only when torch is provided another way (e.g. Jetson/JetPack wheels).
|
|
218
|
+
- `[torch]` → the **foundation**; required to run the model locally (CPU/CUDA/MPS). Start here.
|
|
219
|
+
- `[onnx]` / `[trt]` → **add** a backend *on top of* torch (they don't replace it). Install together: `pip install "online-face-detection[torch,onnx]"`.
|
|
220
|
+
- `[serve]` → runs the model in-process, so it needs torch too: `pip install "online-face-detection[torch,serve]"`.
|
|
221
|
+
- `[client]` → the **only torch-free** one — it just calls a remote service, so `pip install "online-face-detection[client]"` **alone is enough**.
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## (Optional) Serve it as an HTTP service
|
|
226
|
+
|
|
227
|
+
Besides the in-process use above, the model can run as its own HTTP service (local or cloud)
|
|
228
|
+
and be called by URL. Needs the `[serve]` extra (adds only fastapi/uvicorn on top of `[torch]`).
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
pip install "online-face-detection[serve]"
|
|
232
|
+
online-face-serve --model retinaface --device auto --runtime auto --host 127.0.0.1 --port 8001
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**Server flags** (all optional; defaults shown): `--model retinaface` ·
|
|
236
|
+
`--weights KEY|PATH` (default: family default) · `--device {auto,cpu,cuda,mps}` ·
|
|
237
|
+
`--runtime {auto,torch,torchscript,onnx,trt}` · `--precision {auto,fp32,fp16,int8}` ·
|
|
238
|
+
`--conf 0.5` · `--nms 0.4` · `--input-size N` · `--host 127.0.0.1` · `--port 8001`.
|
|
239
|
+
|
|
240
|
+
| Route | What it does |
|
|
241
|
+
|-------|--------------|
|
|
242
|
+
| `GET /meta` | self-describing: named, typed inputs/outputs (input `frame: image`; outputs `boxes/scores/landmarks`) |
|
|
243
|
+
| `GET /healthz` | readiness + resolved runtime/device |
|
|
244
|
+
| `POST /predict` | multipart with a `frame` image part → JSON `{outputs, stats}` |
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
curl http://127.0.0.1:8001/meta
|
|
248
|
+
curl -F 'frame=@frame.png;type=image/png' http://127.0.0.1:8001/predict
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
**Call it from another process** with the torch-free `[client]` proxy (mirrors `det(frame)`):
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
pip install "online-face-detection[client]"
|
|
255
|
+
```
|
|
256
|
+
```python
|
|
257
|
+
from online_face.client import FaceClient
|
|
258
|
+
|
|
259
|
+
face = FaceClient(
|
|
260
|
+
"http://127.0.0.1:8001", # the service URL (local or cloud)
|
|
261
|
+
encode="png", # how frames go over the wire: "png" (lossless) | "jpeg" (smaller)
|
|
262
|
+
timeout=30, # request timeout, seconds
|
|
263
|
+
)
|
|
264
|
+
res = face(frame) # same shape as det(frame): res.boxes / res.scores / res.landmarks
|
|
265
|
+
face.meta() # the service's /meta; face.healthz() -> readiness
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
Compose two services into a pipeline (e.g. face → emotion) by URL — see
|
|
269
|
+
**[../testing-pipeline](../testing-pipeline)** for a ready-to-run example.
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Misc
|
|
274
|
+
|
|
275
|
+
### Install with uv
|
|
276
|
+
|
|
277
|
+
Same as pip, with `uv`:
|
|
278
|
+
```bash
|
|
279
|
+
uv add "online-face-detection[torch]" # into a uv project
|
|
280
|
+
uv pip install "online-face-detection[torch]" # into the active venv
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Jetson (JetPack)
|
|
284
|
+
|
|
285
|
+
On Jetson the whole GPU stack (CUDA / cuDNN / TensorRT) is part of **JetPack**, and torch/onnxruntime
|
|
286
|
+
must be NVIDIA's Jetson wheels — the PyPI `[torch]`/`[onnx]` wheels are x86_64 and won't use the GPU.
|
|
287
|
+
|
|
288
|
+
**1. Pick a JetPack version.**
|
|
289
|
+
|
|
290
|
+
| Board | JetPack | Stack |
|
|
291
|
+
|-------|---------|-------|
|
|
292
|
+
| Orin (AGX/NX/Nano) | **6.x** | CUDA 12.6 · TensorRT 10.3 · PyTorch 2.6 wheel |
|
|
293
|
+
| Xavier / older | **5.1.x** | torch ~2.1 |
|
|
294
|
+
|
|
295
|
+
Both are above this package's `torch>=2.1` floor.
|
|
296
|
+
|
|
297
|
+
**2. Install these into the JetPack env first** — from NVIDIA's
|
|
298
|
+
[PyTorch for Jetson](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) guide, or the
|
|
299
|
+
[jetson-ai-lab](https://pypi.jetson-ai-lab.io) wheel index matched to your JetPack (e.g.
|
|
300
|
+
`--index-url https://pypi.jetson-ai-lab.io/jp6/cu126` for JetPack 6.x):
|
|
301
|
+
|
|
302
|
+
- `torch`, `torchvision` — the **Jetson GPU wheels** (not from PyPI)
|
|
303
|
+
- `onnxruntime-gpu` — only if you'll use the ONNX backend
|
|
304
|
+
- `opencv-python`, `numpy` — usually already present in JetPack; install if missing
|
|
305
|
+
- TensorRT — **already installed by JetPack** (nothing to do)
|
|
306
|
+
|
|
307
|
+
**3. Then install this package with NO runtime extra**, so it uses the system ones:
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
pip install online-face-detection # no [torch] / [onnx]
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
It adapts to whatever JetPack provides and keys each cached TensorRT engine to the exact board.
|
|
314
|
+
|
|
315
|
+
> **Conflicting model requirements?** One Jetson has a single system torch/TRT. If two models need
|
|
316
|
+
> incompatible torch/CUDA, run each as its own [HTTP service](#optional-serve-it-as-an-http-service)
|
|
317
|
+
> (e.g. an `nvcr.io/nvidia/l4t-pytorch` container) and compose them by URL with the `[client]` proxy.
|
|
318
|
+
|
|
319
|
+
### Pre-build & cache an artifact
|
|
320
|
+
|
|
321
|
+
Optional — otherwise built on first use. Choose the runtime you'll deploy with for the target device:
|
|
322
|
+
```bash
|
|
323
|
+
online-face-export --model retinaface --weights mobilenet0.25 --runtime trt --device auto
|
|
324
|
+
```
|
|
325
|
+
Flags: `--model` · `--weights KEY|PATH` · `--runtime {torchscript,onnx,trt}` ·
|
|
326
|
+
`--device {auto,cpu,cuda,mps}` · `--precision {auto,fp32,fp16,int8}` · `--input-size N`.
|
|
327
|
+
|
|
328
|
+
## License
|
|
329
|
+
|
|
330
|
+
MIT © Surya Chand Rayala
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# online-face-detection
|
|
2
|
+
|
|
3
|
+
Streaming, **frame-by-frame** face detection for real-time pipelines:
|
|
4
|
+
one small object — a frame in, structured results out. Runs under **torch / torchscript /
|
|
5
|
+
onnx / tensorrt** with export-once caching, on CPU, CUDA, Apple Silicon (MPS), and Jetson.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from online_face import FaceDetector
|
|
9
|
+
det = FaceDetector("retinaface", device="auto")
|
|
10
|
+
res = det(frame) # res.boxes, res.scores, res.landmarks
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
> **Models today:** RetinaFace. More face-detection families plug in via the registry — coming later.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install "online-face-detection[torch]"
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
That's all you need for most setups — `[torch]` is the default runtime and works on CPU,
|
|
24
|
+
CUDA, and Mac (MPS). Other backends (`onnx`, `tensorrt`, serving) are **optional extras** you
|
|
25
|
+
can add anytime — see [Install options](#install-options). (Prefer `uv`? See [Misc](#misc).)
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Use it (Python)
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from online_face import FaceDetector
|
|
33
|
+
|
|
34
|
+
det = FaceDetector(
|
|
35
|
+
"retinaface", # model family (the only one today)
|
|
36
|
+
device="auto", # "auto" (CUDA > MPS > CPU) | "cpu" | "cuda" | "mps"
|
|
37
|
+
runtime="auto", # "auto" | "torch" | "torchscript" | "onnx" | "trt"
|
|
38
|
+
conf=0.5, # detection confidence threshold
|
|
39
|
+
nms=0.4, # NMS IoU threshold
|
|
40
|
+
)
|
|
41
|
+
res = det(frame) # weights auto-download on first use; see "Input" below
|
|
42
|
+
|
|
43
|
+
res.boxes # (N, 4) xyxy, in original-frame coordinates
|
|
44
|
+
res.scores # (N,)
|
|
45
|
+
res.landmarks # (N, 5, 2)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Input — what `frame` must be:** a NumPy array in **BGR** order, shape **`(H, W, 3)`**, dtype
|
|
49
|
+
**`uint8`** (OpenCV's native format — e.g. straight from `cv2.imread(...)` or
|
|
50
|
+
`cv2.VideoCapture(...).read()`), **or** a `torch.Tensor` of shape **`(3, H, W)`**. Any
|
|
51
|
+
resolution; the model letterboxes internally.
|
|
52
|
+
|
|
53
|
+
Drive a video file or a live stream (FPS/latency print to the terminal):
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
for frame_ref, res in det.run_source("video.mp4"): # a file
|
|
57
|
+
...
|
|
58
|
+
for frame_ref, res in det.run_source("rtsp://cam/stream", is_stream=True): # live
|
|
59
|
+
...
|
|
60
|
+
for frame_ref, res in det.run_source("video.mp4", is_stream=True): # file as a stream
|
|
61
|
+
...
|
|
62
|
+
# frame_ref.image is the BGR frame; res is the FaceFrameResult for that frame.
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Output —** `FaceFrameResult`: `boxes (N,4)` xyxy · `scores (N,)` · `landmarks (N,5,2)` ·
|
|
66
|
+
`frame_index` · `shape (H,W)`. Coordinates are in the **original** frame. `det.stats.as_dict()`
|
|
67
|
+
gives rolling fps / latency / per-stage timings.
|
|
68
|
+
|
|
69
|
+
### Or from the terminal
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# detect on a video file and show a window (boxes + landmarks + FPS; press q/ESC to quit)
|
|
73
|
+
online-face --source video.mp4 --device auto --runtime auto --conf 0.5 --nms 0.4 --display
|
|
74
|
+
|
|
75
|
+
# webcam (index 0) as a live stream
|
|
76
|
+
online-face --source 0 --device auto --runtime auto --stream --display
|
|
77
|
+
|
|
78
|
+
# headless: write an annotated mp4 instead of showing a window
|
|
79
|
+
online-face --source video.mp4 --device auto --runtime auto --save-video out.mp4
|
|
80
|
+
|
|
81
|
+
# discover weights, or see every flag
|
|
82
|
+
online-face --list-weights
|
|
83
|
+
online-face --help
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`online-face` == `python -m online_face.cli.run`. All flags:
|
|
87
|
+
`--source` (file path | webcam index | rtsp/http url) · `--device {auto,cpu,cuda,mps}` ·
|
|
88
|
+
`--runtime {auto,torch,torchscript,onnx,trt}` · `--conf` · `--nms` · `--stream` · `--display` ·
|
|
89
|
+
`--save-video PATH` · `--max-frames N` · `--list-weights`.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Models & weights
|
|
94
|
+
|
|
95
|
+
`model` is the **family** (`retinaface` — the only one today); `weights` is the actual weight —
|
|
96
|
+
a known key (auto-downloaded) or a file path. `weights=None` uses the default.
|
|
97
|
+
|
|
98
|
+
| weights key | impl | exportable | notes |
|
|
99
|
+
|-------------|------|------------|-------|
|
|
100
|
+
| `mobilenet0.25` *(default)* | biubug6 | onnx / trt | light, edge-friendly; auto-downloads (~1.7 MB) |
|
|
101
|
+
| `resnet50` | biubug6 | onnx / trt | higher accuracy; auto-downloads (~109 MB, sha256-checked) |
|
|
102
|
+
| `ternaus_resnet50` | ternaus | torch-only | a convenience weight; works out of the box |
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
FaceDetector("retinaface", weights="mobilenet0.25") # default, auto-downloads
|
|
106
|
+
FaceDetector("retinaface", weights="/models/retinaface.onnx", runtime="onnx") # a ready artifact
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**`resnet50` is auto-downloaded** (~109 MB, sha256-verified) from the official biubug6 mirror on
|
|
110
|
+
first use — nothing to do. If Google Drive ever rate-limits you, download `Resnet50_Final.pth` from
|
|
111
|
+
[biubug6/Pytorch_Retinaface](https://drive.google.com/file/d/14KX6VqF69MdSPk3Tr9PlDYbq7ArpdNUW/view) and pass the path:
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
FaceDetector("retinaface", weights="/path/to/Resnet50_Final.pth") # or --weights on the CLI/serve
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
…or drop it at `~/.cache/online_inference/weights/retinaface_resnet50.pth` and use `weights="resnet50"`.
|
|
118
|
+
(Keep `resnet`/`r50` in the filename — the arch is inferred from the name. The same applies to any
|
|
119
|
+
custom weight file.)
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## Runtimes & the export cache
|
|
124
|
+
|
|
125
|
+
`runtime="auto"` picks the best backend per device: **Jetson/CUDA → tensorrt** (else onnx-CUDA),
|
|
126
|
+
**macOS → torch (MPS)**, **CPU → onnx/torch**. The first time a non-torch runtime is used, the
|
|
127
|
+
artifact (torchscript / onnx / trt engine) is **built once and cached** under
|
|
128
|
+
`~/.cache/online_inference/` (override with `$ONLINE_INFERENCE_CACHE`); later runs load it.
|
|
129
|
+
TensorRT engines are keyed to the exact GPU/JetPack so they never load on the wrong device.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Install options
|
|
134
|
+
|
|
135
|
+
`[torch]` is all most people need. Add extras for other backends. **Extras are additive** — if
|
|
136
|
+
you already installed `[torch]`, running `pip install "online-face-detection[serve]"` later just
|
|
137
|
+
adds those packages (it won't reinstall torch). You can also install several at once:
|
|
138
|
+
`pip install "online-face-detection[torch,onnx,serve]"`.
|
|
139
|
+
|
|
140
|
+
| Extra | Adds | Install when you want to… |
|
|
141
|
+
|-------|------|---------------------------|
|
|
142
|
+
| `[torch]` | torch, torchvision, retinaface-pytorch | **default** runtime (CPU / CUDA / MPS) |
|
|
143
|
+
| `[onnx]` | onnxruntime, onnx, onnxsim | run or export the ONNX backend |
|
|
144
|
+
| `[trt]` | tensorrt | build/run TensorRT engines (NVIDIA) |
|
|
145
|
+
| `[serve]` | fastapi, uvicorn | host the model as an HTTP service (below) |
|
|
146
|
+
| `[client]` | requests | call a remote service (torch-free, below) |
|
|
147
|
+
|
|
148
|
+
**Which do I actually need?**
|
|
149
|
+
- `pip install online-face-detection` (no `[...]`) → **core only** (numpy/opencv); **no runtime, can't run inference**. Use this only when torch is provided another way (e.g. Jetson/JetPack wheels).
|
|
150
|
+
- `[torch]` → the **foundation**; required to run the model locally (CPU/CUDA/MPS). Start here.
|
|
151
|
+
- `[onnx]` / `[trt]` → **add** a backend *on top of* torch (they don't replace it). Install together: `pip install "online-face-detection[torch,onnx]"`.
|
|
152
|
+
- `[serve]` → runs the model in-process, so it needs torch too: `pip install "online-face-detection[torch,serve]"`.
|
|
153
|
+
- `[client]` → the **only torch-free** one — it just calls a remote service, so `pip install "online-face-detection[client]"` **alone is enough**.
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## (Optional) Serve it as an HTTP service
|
|
158
|
+
|
|
159
|
+
Besides the in-process use above, the model can run as its own HTTP service (local or cloud)
|
|
160
|
+
and be called by URL. Needs the `[serve]` extra (adds only fastapi/uvicorn on top of `[torch]`).
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
pip install "online-face-detection[serve]"
|
|
164
|
+
online-face-serve --model retinaface --device auto --runtime auto --host 127.0.0.1 --port 8001
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
**Server flags** (all optional; defaults shown): `--model retinaface` ·
|
|
168
|
+
`--weights KEY|PATH` (default: family default) · `--device {auto,cpu,cuda,mps}` ·
|
|
169
|
+
`--runtime {auto,torch,torchscript,onnx,trt}` · `--precision {auto,fp32,fp16,int8}` ·
|
|
170
|
+
`--conf 0.5` · `--nms 0.4` · `--input-size N` · `--host 127.0.0.1` · `--port 8001`.
|
|
171
|
+
|
|
172
|
+
| Route | What it does |
|
|
173
|
+
|-------|--------------|
|
|
174
|
+
| `GET /meta` | self-describing: named, typed inputs/outputs (input `frame: image`; outputs `boxes/scores/landmarks`) |
|
|
175
|
+
| `GET /healthz` | readiness + resolved runtime/device |
|
|
176
|
+
| `POST /predict` | multipart with a `frame` image part → JSON `{outputs, stats}` |
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
curl http://127.0.0.1:8001/meta
|
|
180
|
+
curl -F 'frame=@frame.png;type=image/png' http://127.0.0.1:8001/predict
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**Call it from another process** with the torch-free `[client]` proxy (mirrors `det(frame)`):
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
pip install "online-face-detection[client]"
|
|
187
|
+
```
|
|
188
|
+
```python
|
|
189
|
+
from online_face.client import FaceClient
|
|
190
|
+
|
|
191
|
+
face = FaceClient(
|
|
192
|
+
"http://127.0.0.1:8001", # the service URL (local or cloud)
|
|
193
|
+
encode="png", # how frames go over the wire: "png" (lossless) | "jpeg" (smaller)
|
|
194
|
+
timeout=30, # request timeout, seconds
|
|
195
|
+
)
|
|
196
|
+
res = face(frame) # same shape as det(frame): res.boxes / res.scores / res.landmarks
|
|
197
|
+
face.meta() # the service's /meta; face.healthz() -> readiness
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Compose two services into a pipeline (e.g. face → emotion) by URL — see
|
|
201
|
+
**[../testing-pipeline](../testing-pipeline)** for a ready-to-run example.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Misc
|
|
206
|
+
|
|
207
|
+
### Install with uv
|
|
208
|
+
|
|
209
|
+
Same as pip, with `uv`:
|
|
210
|
+
```bash
|
|
211
|
+
uv add "online-face-detection[torch]" # into a uv project
|
|
212
|
+
uv pip install "online-face-detection[torch]" # into the active venv
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Jetson (JetPack)
|
|
216
|
+
|
|
217
|
+
On Jetson the whole GPU stack (CUDA / cuDNN / TensorRT) is part of **JetPack**, and torch/onnxruntime
|
|
218
|
+
must be NVIDIA's Jetson wheels — the PyPI `[torch]`/`[onnx]` wheels are x86_64 and won't use the GPU.
|
|
219
|
+
|
|
220
|
+
**1. Pick a JetPack version.**
|
|
221
|
+
|
|
222
|
+
| Board | JetPack | Stack |
|
|
223
|
+
|-------|---------|-------|
|
|
224
|
+
| Orin (AGX/NX/Nano) | **6.x** | CUDA 12.6 · TensorRT 10.3 · PyTorch 2.6 wheel |
|
|
225
|
+
| Xavier / older | **5.1.x** | torch ~2.1 |
|
|
226
|
+
|
|
227
|
+
Both are above this package's `torch>=2.1` floor.
|
|
228
|
+
|
|
229
|
+
**2. Install these into the JetPack env first** — from NVIDIA's
|
|
230
|
+
[PyTorch for Jetson](https://forums.developer.nvidia.com/t/pytorch-for-jetson/72048) guide, or the
|
|
231
|
+
[jetson-ai-lab](https://pypi.jetson-ai-lab.io) wheel index matched to your JetPack (e.g.
|
|
232
|
+
`--index-url https://pypi.jetson-ai-lab.io/jp6/cu126` for JetPack 6.x):
|
|
233
|
+
|
|
234
|
+
- `torch`, `torchvision` — the **Jetson GPU wheels** (not from PyPI)
|
|
235
|
+
- `onnxruntime-gpu` — only if you'll use the ONNX backend
|
|
236
|
+
- `opencv-python`, `numpy` — usually already present in JetPack; install if missing
|
|
237
|
+
- TensorRT — **already installed by JetPack** (nothing to do)
|
|
238
|
+
|
|
239
|
+
**3. Then install this package with NO runtime extra**, so it uses the system ones:
|
|
240
|
+
|
|
241
|
+
```bash
|
|
242
|
+
pip install online-face-detection # no [torch] / [onnx]
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
It adapts to whatever JetPack provides and keys each cached TensorRT engine to the exact board.
|
|
246
|
+
|
|
247
|
+
> **Conflicting model requirements?** One Jetson has a single system torch/TRT. If two models need
|
|
248
|
+
> incompatible torch/CUDA, run each as its own [HTTP service](#optional-serve-it-as-an-http-service)
|
|
249
|
+
> (e.g. an `nvcr.io/nvidia/l4t-pytorch` container) and compose them by URL with the `[client]` proxy.
|
|
250
|
+
|
|
251
|
+
### Pre-build & cache an artifact
|
|
252
|
+
|
|
253
|
+
Optional — otherwise built on first use. Choose the runtime you'll deploy with for the target device:
|
|
254
|
+
```bash
|
|
255
|
+
online-face-export --model retinaface --weights mobilenet0.25 --runtime trt --device auto
|
|
256
|
+
```
|
|
257
|
+
Flags: `--model` · `--weights KEY|PATH` · `--runtime {torchscript,onnx,trt}` ·
|
|
258
|
+
`--device {auto,cpu,cuda,mps}` · `--precision {auto,fp32,fp16,int8}` · `--input-size N`.
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
MIT © Surya Chand Rayala
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Minimal demo: frame-by-frame and stream usage with terminal FPS stats.
|
|
2
|
+
|
|
3
|
+
python examples/demo.py --source /path/to/video.mp4 --display
|
|
4
|
+
python examples/demo.py --source 0 --stream --display # webcam
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
|
|
10
|
+
from online_face import FaceDetector
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
ap = argparse.ArgumentParser()
|
|
15
|
+
ap.add_argument("--source", required=True, help="video path | webcam index | rtsp/http url")
|
|
16
|
+
ap.add_argument("--stream", action="store_true", help="treat source as a live stream")
|
|
17
|
+
ap.add_argument("--display", action="store_true")
|
|
18
|
+
ap.add_argument("--weights", default=None)
|
|
19
|
+
ap.add_argument("--runtime", default="auto")
|
|
20
|
+
ap.add_argument("--max-frames", type=int, default=None)
|
|
21
|
+
args = ap.parse_args()
|
|
22
|
+
|
|
23
|
+
# One detector object, reused frame-by-frame (this is what you embed in a pipeline).
|
|
24
|
+
det = FaceDetector("retinaface", weights=args.weights, runtime=args.runtime, device="auto")
|
|
25
|
+
print("config:", det.config.summary())
|
|
26
|
+
|
|
27
|
+
for frame_ref, result in det.run_source(args.source, is_stream=(True if args.stream else None),
|
|
28
|
+
display=args.display, max_frames=args.max_frames):
|
|
29
|
+
# result.boxes (N,4), result.scores (N,), result.landmarks (N,5,2) in original-frame coords.
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
print("final stats:", det.stats.as_dict())
|
|
33
|
+
det.close()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if __name__ == "__main__":
|
|
37
|
+
main()
|