rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
# PoseDetector API
|
|
2
|
+
|
|
3
|
+
High-performance unified API for real-time person detection and pose estimation.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`PoseDetector` combines YOLO12 object detection with RTMW pose estimation in a single, optimized interface. Designed for speed and ease of use with convenient methods for web elements.
|
|
8
|
+
|
|
9
|
+
**Models are loaded automatically from HuggingFace if not specified.**
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install rtmlib-ts
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
### Default Models (Auto-loaded)
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
import { PoseDetector } from 'rtmlib-ts';
|
|
23
|
+
|
|
24
|
+
// Initialize with default models from HuggingFace
|
|
25
|
+
const detector = new PoseDetector();
|
|
26
|
+
await detector.init();
|
|
27
|
+
|
|
28
|
+
const canvas = document.getElementById('canvas') as HTMLCanvasElement;
|
|
29
|
+
const people = await detector.detectFromCanvas(canvas);
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### From Canvas
|
|
33
|
+
|
|
34
|
+
```typescript
|
|
35
|
+
import { PoseDetector } from 'rtmlib-ts';
|
|
36
|
+
|
|
37
|
+
const detector = new PoseDetector({
|
|
38
|
+
detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
|
|
39
|
+
poseModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx',
|
|
40
|
+
});
|
|
41
|
+
await detector.init();
|
|
42
|
+
|
|
43
|
+
const canvas = document.getElementById('canvas') as HTMLCanvasElement;
|
|
44
|
+
const people = await detector.detectFromCanvas(canvas);
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### From Video (Real-time)
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
const video = document.getElementById('video') as HTMLVideoElement;
|
|
51
|
+
const people = await detector.detectFromVideo(video);
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### From Image Element
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
const img = document.getElementById('image') as HTMLImageElement;
|
|
58
|
+
const people = await detector.detectFromImage(img);
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### From File Upload
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
const fileInput = document.getElementById('file') as HTMLInputElement;
|
|
65
|
+
fileInput.addEventListener('change', async (e) => {
|
|
66
|
+
const file = (e.target as HTMLInputElement).files?.[0];
|
|
67
|
+
if (file) {
|
|
68
|
+
const people = await detector.detectFromFile(file);
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### From Camera (Blob)
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
|
|
77
|
+
const video = document.querySelector('video');
|
|
78
|
+
video.srcObject = stream;
|
|
79
|
+
|
|
80
|
+
video.addEventListener('play', async () => {
|
|
81
|
+
const people = await detector.detectFromVideo(video);
|
|
82
|
+
});
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## API Reference
|
|
86
|
+
|
|
87
|
+
### Constructor
|
|
88
|
+
|
|
89
|
+
```typescript
|
|
90
|
+
new PoseDetector(config?: PoseDetectorConfig)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
**Configuration Options:**
|
|
94
|
+
|
|
95
|
+
| Option | Type | Default | Description |
|
|
96
|
+
|--------|------|---------|-------------|
|
|
97
|
+
| `detModel` | `string` | optional | Path to YOLO12 detection model |
|
|
98
|
+
| `poseModel` | `string` | optional | Path to RTMW pose model |
|
|
99
|
+
| `detInputSize` | `[number, number]` | `[416, 416]` | Detection input size |
|
|
100
|
+
| `poseInputSize` | `[number, number]` | `[384, 288]` | Pose input size |
|
|
101
|
+
| `detConfidence` | `number` | `0.5` | Detection confidence threshold |
|
|
102
|
+
| `nmsThreshold` | `number` | `0.45` | NMS IoU threshold |
|
|
103
|
+
| `poseConfidence` | `number` | `0.3` | Keypoint visibility threshold |
|
|
104
|
+
| `backend` | `'wasm' \| 'webgpu'` | `'wasm'` | Execution backend |
|
|
105
|
+
| `cache` | `boolean` | `true` | Enable model caching |
|
|
106
|
+
|
|
107
|
+
### Default Models
|
|
108
|
+
|
|
109
|
+
If `detModel` and `poseModel` are not specified, the following default models are used:
|
|
110
|
+
|
|
111
|
+
- **Detector**: `https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx`
|
|
112
|
+
- **Pose**: `https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx`
|
|
113
|
+
|
|
114
|
+
### Methods
|
|
115
|
+
|
|
116
|
+
#### `init()`
|
|
117
|
+
|
|
118
|
+
Initialize both detection and pose models.
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
await detector.init();
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
#### `detectFromCanvas()`
|
|
125
|
+
|
|
126
|
+
Detect poses from HTMLCanvasElement.
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
async detectFromCanvas(canvas: HTMLCanvasElement): Promise<Person[]>
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
#### `detectFromVideo()`
|
|
133
|
+
|
|
134
|
+
Detect poses from HTMLVideoElement (for real-time video processing).
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
async detectFromVideo(
|
|
138
|
+
video: HTMLVideoElement,
|
|
139
|
+
targetCanvas?: HTMLCanvasElement
|
|
140
|
+
): Promise<Person[]>
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
#### `detectFromImage()`
|
|
144
|
+
|
|
145
|
+
Detect poses from HTMLImageElement.
|
|
146
|
+
|
|
147
|
+
```typescript
|
|
148
|
+
async detectFromImage(
|
|
149
|
+
image: HTMLImageElement,
|
|
150
|
+
targetCanvas?: HTMLCanvasElement
|
|
151
|
+
): Promise<Person[]>
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
#### `detectFromFile()`
|
|
155
|
+
|
|
156
|
+
Detect poses from File object (for file uploads).
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
async detectFromFile(
|
|
160
|
+
file: File,
|
|
161
|
+
targetCanvas?: HTMLCanvasElement
|
|
162
|
+
): Promise<Person[]>
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
#### `detectFromBlob()`
|
|
166
|
+
|
|
167
|
+
Detect poses from Blob (for camera capture or downloads).
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
async detectFromBlob(
|
|
171
|
+
blob: Blob,
|
|
172
|
+
targetCanvas?: HTMLCanvasElement
|
|
173
|
+
): Promise<Person[]>
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### `detect()`
|
|
177
|
+
|
|
178
|
+
Low-level method for raw image data.
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
async detect(
|
|
182
|
+
imageData: Uint8Array,
|
|
183
|
+
width: number,
|
|
184
|
+
height: number
|
|
185
|
+
): Promise<Person[]>
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
#### `dispose()`
|
|
189
|
+
|
|
190
|
+
Release resources and models.
|
|
191
|
+
|
|
192
|
+
```typescript
|
|
193
|
+
detector.dispose();
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Types
|
|
197
|
+
|
|
198
|
+
#### `Person`
|
|
199
|
+
|
|
200
|
+
```typescript
|
|
201
|
+
interface Person {
|
|
202
|
+
bbox: {
|
|
203
|
+
x1: number;
|
|
204
|
+
y1: number;
|
|
205
|
+
x2: number;
|
|
206
|
+
y2: number;
|
|
207
|
+
confidence: number;
|
|
208
|
+
};
|
|
209
|
+
keypoints: Keypoint[];
|
|
210
|
+
scores: number[];
|
|
211
|
+
}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
#### `Keypoint`
|
|
215
|
+
|
|
216
|
+
```typescript
|
|
217
|
+
interface Keypoint {
|
|
218
|
+
x: number;
|
|
219
|
+
y: number;
|
|
220
|
+
score: number;
|
|
221
|
+
visible: boolean;
|
|
222
|
+
name: string;
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**Keypoint Names (COCO17):**
|
|
227
|
+
0. `nose`
|
|
228
|
+
1. `left_eye`
|
|
229
|
+
2. `right_eye`
|
|
230
|
+
3. `left_ear`
|
|
231
|
+
4. `right_ear`
|
|
232
|
+
5. `left_shoulder`
|
|
233
|
+
6. `right_shoulder`
|
|
234
|
+
7. `left_elbow`
|
|
235
|
+
8. `right_elbow`
|
|
236
|
+
9. `left_wrist`
|
|
237
|
+
10. `right_wrist`
|
|
238
|
+
11. `left_hip`
|
|
239
|
+
12. `right_hip`
|
|
240
|
+
13. `left_knee`
|
|
241
|
+
14. `right_knee`
|
|
242
|
+
15. `left_ankle`
|
|
243
|
+
16. `right_ankle`
|
|
244
|
+
|
|
245
|
+
#### `PoseStats`
|
|
246
|
+
|
|
247
|
+
Performance statistics attached to results:
|
|
248
|
+
|
|
249
|
+
```typescript
|
|
250
|
+
interface PoseStats {
|
|
251
|
+
personCount: number;
|
|
252
|
+
detTime: number; // Detection time (ms)
|
|
253
|
+
poseTime: number; // Pose estimation time (ms)
|
|
254
|
+
totalTime: number; // Total processing time (ms)
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Access via: `(people as any).stats`
|
|
259
|
+
|
|
260
|
+
## Performance Optimization
|
|
261
|
+
|
|
262
|
+
### 1. Use WebGPU Backend (if available)
|
|
263
|
+
|
|
264
|
+
```typescript
|
|
265
|
+
const detector = new PoseDetector({
|
|
266
|
+
detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
|
|
267
|
+
poseModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx',
|
|
268
|
+
backend: 'webgpu', // Faster than WASM
|
|
269
|
+
});
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### 2. Adjust Input Sizes
|
|
273
|
+
|
|
274
|
+
Smaller input sizes = faster inference:
|
|
275
|
+
|
|
276
|
+
```typescript
|
|
277
|
+
// Fast (lower accuracy)
|
|
278
|
+
const detector = new PoseDetector({
|
|
279
|
+
detInputSize: [416, 416],
|
|
280
|
+
poseInputSize: [256, 192],
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
// Balanced
|
|
284
|
+
const detector = new PoseDetector({
|
|
285
|
+
detInputSize: [640, 640],
|
|
286
|
+
poseInputSize: [384, 288],
|
|
287
|
+
});
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
### 3. Tune Confidence Thresholds
|
|
291
|
+
|
|
292
|
+
Higher thresholds = fewer detections but faster:
|
|
293
|
+
|
|
294
|
+
```typescript
|
|
295
|
+
const detector = new PoseDetector({
|
|
296
|
+
detConfidence: 0.6, // Skip low-confidence detections
|
|
297
|
+
poseConfidence: 0.4, // Only show confident keypoints
|
|
298
|
+
});
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### 4. Reuse Detector Instance
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
// ❌ Don't create new detector for each frame
|
|
305
|
+
const detector = new PoseDetector(config);
|
|
306
|
+
|
|
307
|
+
// ✅ Reuse same instance
|
|
308
|
+
for (const frame of videoFrames) {
|
|
309
|
+
const people = await detector.detect(frame.data, frame.width, frame.height);
|
|
310
|
+
}
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
### 5. Batch Processing (for multiple images)
|
|
314
|
+
|
|
315
|
+
```typescript
|
|
316
|
+
// Process images sequentially with same detector
|
|
317
|
+
const detector = new PoseDetector(config);
|
|
318
|
+
await detector.init();
|
|
319
|
+
|
|
320
|
+
const results = await Promise.all(
|
|
321
|
+
images.map(img => detector.detect(img.data, img.width, img.height))
|
|
322
|
+
);
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
## Complete Example
|
|
326
|
+
|
|
327
|
+
```typescript
|
|
328
|
+
import { PoseDetector } from 'rtmlib-ts';
|
|
329
|
+
|
|
330
|
+
async function main() {
|
|
331
|
+
// Initialize
|
|
332
|
+
const detector = new PoseDetector({
|
|
333
|
+
detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
|
|
334
|
+
poseModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx',
|
|
335
|
+
detInputSize: [640, 640],
|
|
336
|
+
poseInputSize: [384, 288],
|
|
337
|
+
detConfidence: 0.5,
|
|
338
|
+
nmsThreshold: 0.45,
|
|
339
|
+
poseConfidence: 0.3,
|
|
340
|
+
backend: 'wasm',
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
await detector.init();
|
|
344
|
+
|
|
345
|
+
// Load image
|
|
346
|
+
const response = await fetch('image.jpg');
|
|
347
|
+
const blob = await response.blob();
|
|
348
|
+
const imageBitmap = await createImageBitmap(blob);
|
|
349
|
+
|
|
350
|
+
const canvas = document.createElement('canvas');
|
|
351
|
+
canvas.width = imageBitmap.width;
|
|
352
|
+
canvas.height = imageBitmap.height;
|
|
353
|
+
const ctx = canvas.getContext('2d')!;
|
|
354
|
+
ctx.drawImage(imageBitmap, 0, 0);
|
|
355
|
+
|
|
356
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
357
|
+
const data = new Uint8Array(imageData.data);
|
|
358
|
+
|
|
359
|
+
// Detect
|
|
360
|
+
const startTime = performance.now();
|
|
361
|
+
const people = await detector.detect(data, canvas.width, canvas.height);
|
|
362
|
+
const endTime = performance.now();
|
|
363
|
+
|
|
364
|
+
// Print stats
|
|
365
|
+
const stats = (people as any).stats;
|
|
366
|
+
console.log(`Detected ${stats.personCount} people in ${stats.totalTime}ms`);
|
|
367
|
+
console.log(` Detection: ${stats.detTime}ms`);
|
|
368
|
+
console.log(` Pose: ${stats.poseTime}ms`);
|
|
369
|
+
|
|
370
|
+
// Draw results
|
|
371
|
+
people.forEach((person, i) => {
|
|
372
|
+
// Draw bounding box
|
|
373
|
+
ctx.strokeStyle = `hsl(${i * 60}, 80%, 50%)`;
|
|
374
|
+
ctx.lineWidth = 2;
|
|
375
|
+
ctx.strokeRect(
|
|
376
|
+
person.bbox.x1,
|
|
377
|
+
person.bbox.y1,
|
|
378
|
+
person.bbox.x2 - person.bbox.x1,
|
|
379
|
+
person.bbox.y2 - person.bbox.y1
|
|
380
|
+
);
|
|
381
|
+
|
|
382
|
+
// Draw keypoints
|
|
383
|
+
person.keypoints.forEach(kp => {
|
|
384
|
+
if (!kp.visible) return;
|
|
385
|
+
|
|
386
|
+
ctx.fillStyle = '#00ff00';
|
|
387
|
+
ctx.beginPath();
|
|
388
|
+
ctx.arc(kp.x, kp.y, 4, 0, Math.PI * 2);
|
|
389
|
+
ctx.fill();
|
|
390
|
+
});
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
// Cleanup
|
|
394
|
+
detector.dispose();
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
main();
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## Browser Support
|
|
401
|
+
|
|
402
|
+
| Browser | Version | Backend |
|
|
403
|
+
|---------|---------|---------|
|
|
404
|
+
| Chrome | 94+ | WASM, WebGPU |
|
|
405
|
+
| Edge | 94+ | WASM, WebGPU |
|
|
406
|
+
| Firefox | 95+ | WASM |
|
|
407
|
+
| Safari | 16.4+ | WASM |
|
|
408
|
+
|
|
409
|
+
## Performance Benchmarks
|
|
410
|
+
|
|
411
|
+
Typical inference times on M1 MacBook Pro:
|
|
412
|
+
|
|
413
|
+
| Configuration | Detection | Pose (per person) | Total (3 people) |
|
|
414
|
+
|--------------|-----------|-------------------|------------------|
|
|
415
|
+
| WASM, 640×640 | 80ms | 25ms | 155ms |
|
|
416
|
+
| WASM, 416×416 | 40ms | 15ms | 85ms |
|
|
417
|
+
| WebGPU, 640×640 | 30ms | 10ms | 60ms |
|
|
418
|
+
|
|
419
|
+
## Troubleshooting
|
|
420
|
+
|
|
421
|
+
### "Model loading failed"
|
|
422
|
+
|
|
423
|
+
- Ensure models are accessible via HTTP (not `file://` protocol)
|
|
424
|
+
- Use a local server: `python -m http.server 8080`
|
|
425
|
+
- Check CORS headers
|
|
426
|
+
|
|
427
|
+
### "Slow inference"
|
|
428
|
+
|
|
429
|
+
- Switch to WebGPU backend if available
|
|
430
|
+
- Reduce input sizes
|
|
431
|
+
- Increase confidence thresholds
|
|
432
|
+
- Process every Nth frame instead of all frames
|
|
433
|
+
|
|
434
|
+
### "No detections"
|
|
435
|
+
|
|
436
|
+
- Lower `detConfidence` threshold
|
|
437
|
+
- Ensure person is visible and reasonably sized
|
|
438
|
+
- Check image format (RGB, not grayscale)
|
|
439
|
+
|
|
440
|
+
## License
|
|
441
|
+
|
|
442
|
+
Apache 2.0
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# YOLO12 Person Detection - Web Demo
|
|
2
|
+
|
|
3
|
+
Real-time person detection using YOLO12 and ONNX Runtime Web.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
### Option 1: Using Python HTTP Server
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# From the rtmlib-ts directory
|
|
11
|
+
python -m http.server 8080 --directory examples
|
|
12
|
+
|
|
13
|
+
# Open in browser
|
|
14
|
+
http://localhost:8080/index.html
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### Option 2: Using Node.js (http-server)
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Install http-server globally
|
|
21
|
+
npm install -g http-server
|
|
22
|
+
|
|
23
|
+
# Run server
|
|
24
|
+
http-server examples -p 8080
|
|
25
|
+
|
|
26
|
+
# Open in browser
|
|
27
|
+
http://localhost:8080/index.html
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Option 3: Using VS Code Live Server
|
|
31
|
+
|
|
32
|
+
1. Install "Live Server" extension in VS Code
|
|
33
|
+
2. Right-click on `examples/index.html`
|
|
34
|
+
3. Select "Open with Live Server"
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- 🎯 **Person Detection**: Detects people in images using YOLO12n model
|
|
39
|
+
- 🚀 **Fast Inference**: Runs entirely in the browser using WebAssembly
|
|
40
|
+
- 📊 **Real-time Stats**: Shows detection count and inference time
|
|
41
|
+
- 🎨 **Visual Results**: Bounding boxes with confidence scores
|
|
42
|
+
- ⚙️ **Adjustable Threshold**: Control detection confidence
|
|
43
|
+
|
|
44
|
+
## How It Works
|
|
45
|
+
|
|
46
|
+
1. **Model Loading**: The YOLO12n ONNX model is loaded via fetch
|
|
47
|
+
2. **Image Upload**: User uploads an image via drag-drop or file picker
|
|
48
|
+
3. **Preprocessing**: Image is resized with letterbox padding (black background)
|
|
49
|
+
4. **Inference**: ONNX Runtime Web runs the model using WebAssembly
|
|
50
|
+
5. **Postprocessing**: Filter by confidence, transform coordinates, apply NMS
|
|
51
|
+
6. **Visualization**: Draw bounding boxes on canvas
|
|
52
|
+
|
|
53
|
+
## Model
|
|
54
|
+
|
|
55
|
+
- **Name**: YOLO12n (Nano)
|
|
56
|
+
- **Input Size**: 640x640
|
|
57
|
+
- **Classes**: 80 COCO classes (we filter for class 0 = person)
|
|
58
|
+
- **Size**: ~11 MB
|
|
59
|
+
- **Source**: Ultralytics
|
|
60
|
+
|
|
61
|
+
## Browser Support
|
|
62
|
+
|
|
63
|
+
- Chrome 94+ (WebAssembly SIMD)
|
|
64
|
+
- Firefox 95+
|
|
65
|
+
- Safari 16.4+
|
|
66
|
+
- Edge 94+
|
|
67
|
+
|
|
68
|
+
## File Structure
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
examples/
|
|
72
|
+
├── index.html # Main web demo page
|
|
73
|
+
├── models/
|
|
74
|
+
│ └── yolov12n.onnx # YOLO12 model
|
|
75
|
+
└── 8.png # Sample image
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## API Usage (TypeScript)
|
|
79
|
+
|
|
80
|
+
```typescript
|
|
81
|
+
import { YOLO12 } from './dist/index.js';
|
|
82
|
+
|
|
83
|
+
// Initialize detector
|
|
84
|
+
const detector = new YOLO12(
|
|
85
|
+
'models/yolov12n.onnx',
|
|
86
|
+
[640, 640], // input size
|
|
87
|
+
0.45, // NMS threshold
|
|
88
|
+
0.5 // confidence threshold
|
|
89
|
+
);
|
|
90
|
+
|
|
91
|
+
await detector.init();
|
|
92
|
+
|
|
93
|
+
// Detect people in image
|
|
94
|
+
const imageData = ...; // Uint8Array RGB image
|
|
95
|
+
const width = 640;
|
|
96
|
+
const height = 480;
|
|
97
|
+
|
|
98
|
+
const detections = await detector.call(imageData, width, height);
|
|
99
|
+
|
|
100
|
+
// detections: Detection[]
|
|
101
|
+
// Each detection has: bbox, score, classId
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Troubleshooting
|
|
105
|
+
|
|
106
|
+
### Model fails to load
|
|
107
|
+
- Ensure the server is running (not just opening file://)
|
|
108
|
+
- Check browser console for CORS errors
|
|
109
|
+
- Verify model file exists in `examples/models/`
|
|
110
|
+
|
|
111
|
+
### Slow inference
|
|
112
|
+
- First run includes WASM compilation (subsequent runs are faster)
|
|
113
|
+
- Use Chrome/Edge for best WebAssembly performance
|
|
114
|
+
- Reduce image size for faster processing
|
|
115
|
+
|
|
116
|
+
### No detections
|
|
117
|
+
- Lower the confidence threshold
|
|
118
|
+
- Ensure person is visible in the image
|
|
119
|
+
- Check that the image format is supported (PNG, JPG, WebP)
|