rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,458 @@
1
+ # Pose3DDetector API
2
+
3
+ High-performance 3D pose estimation with YOLOX detector and RTMW3D pose model.
4
+
5
+ ## Overview
6
+
7
+ `Pose3DDetector` combines YOLOX object detection with RTMW3D 3D pose estimation for full-body 3D keypoint detection. This class provides 3D coordinates (x, y, z) for each keypoint instead of just 2D.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ npm install rtmlib-ts
13
+ ```
14
+
15
+ ## Quick Start
16
+
17
+ ### Basic Usage
18
+
19
+ ```typescript
20
+ import { Pose3DDetector } from 'rtmlib-ts';
21
+
22
+ // Initialize with default models (from HuggingFace)
23
+ const detector = new Pose3DDetector();
24
+ await detector.init();
25
+
26
+ // Detect from canvas
27
+ const result = await detector.detectFromCanvas(canvas);
28
+
29
+ // Access 3D keypoints
30
+ console.log(`Detected ${result.keypoints.length} people`);
31
+ console.log(`3D keypoints: ${result.keypoints[0][0]}`); // [x, y, z] in meters
32
+ ```
33
+
34
+ ### From Canvas
35
+
36
+ ```typescript
37
+ const canvas = document.getElementById('canvas') as HTMLCanvasElement;
38
+ const result = await detector.detectFromCanvas(canvas);
39
+
40
+ result.keypoints.forEach((person, i) => {
41
+ person.forEach((kpt, j) => {
42
+ console.log(`Person ${i}, Keypoint ${j}: x=${kpt[0]}, y=${kpt[1]}, z=${kpt[2]}`);
43
+ });
44
+ });
45
+ ```
46
+
47
+ ### From Video (Real-time)
48
+
49
+ ```typescript
50
+ const video = document.getElementById('video') as HTMLVideoElement;
51
+
52
+ video.addEventListener('play', async () => {
53
+ while (!video.paused && !video.ended) {
54
+ const result = await detector.detectFromVideo(video);
55
+
56
+ // Process 3D keypoints
57
+ result.keypoints.forEach((person, i) => {
58
+ person.forEach((kpt, j) => {
59
+ // kpt = [x, y, z]
60
+ console.log(`Kpt ${j}: [${kpt[0].toFixed(2)}, ${kpt[1].toFixed(2)}, ${kpt[2].toFixed(2)}]`);
61
+ });
62
+ });
63
+
64
+ await new Promise(resolve => requestAnimationFrame(resolve));
65
+ }
66
+ });
67
+ ```
68
+
69
+ ### From Image Element
70
+
71
+ ```typescript
72
+ const img = document.getElementById('image') as HTMLImageElement;
73
+ const result = await detector.detectFromImage(img);
74
+ ```
75
+
76
+ ### From File Upload
77
+
78
+ ```typescript
79
+ const fileInput = document.getElementById('file') as HTMLInputElement;
80
+ fileInput.addEventListener('change', async (e) => {
81
+ const file = (e.target as HTMLInputElement).files?.[0];
82
+ if (file) {
83
+ const result = await detector.detectFromFile(file);
84
+ }
85
+ });
86
+ ```
87
+
88
+ ### From Camera (Blob)
89
+
90
+ ```typescript
91
+ const stream = await navigator.mediaDevices.getUserMedia({ video: true });
92
+ const video = document.querySelector('video');
93
+ video.srcObject = stream;
94
+
95
+ video.addEventListener('play', async () => {
96
+ const result = await detector.detectFromVideo(video);
97
+ });
98
+ ```
99
+
100
+ ## API Reference
101
+
102
+ ### Constructor
103
+
104
+ ```typescript
105
+ new Pose3DDetector(config?: Pose3DDetectorConfig)
106
+ ```
107
+
108
+ **Configuration Options:**
109
+
110
+ | Option | Type | Default | Description |
111
+ |--------|------|---------|-------------|
112
+ | `detModel` | `string` | optional | Path to YOLOX detection model |
113
+ | `poseModel` | `string` | optional | Path to RTMW3D pose model |
114
+ | `detInputSize` | `[number, number]` | `[640, 640]` | Detection input size |
115
+ | `poseInputSize` | `[number, number]` | `[384, 288]` | Pose input size |
116
+ | `detConfidence` | `number` | `0.45` | Detection confidence threshold |
117
+ | `nmsThreshold` | `number` | `0.7` | NMS IoU threshold |
118
+ | `poseConfidence` | `number` | `0.3` | Keypoint visibility threshold |
119
+ | `backend` | `'wasm' \| 'webgpu'` | `'wasm'` | Execution backend |
120
+ | `cache` | `boolean` | `true` | Enable model caching |
121
+ | `zRange` | `number` | `2.1744869` | Z-axis range in meters |
122
+
123
+ ### Default Models
124
+
125
+ If `detModel` and `poseModel` are not specified, the following default models are used:
126
+
127
+ - **Detector**: `https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx`
128
+ - **Pose**: `https://huggingface.co/Soykaf/RTMW3D-x/resolve/main/onnx/rtmw3d-x_8xb64_cocktail14-384x288-b0a0eab7_20240626.onnx`
129
+
130
+ ### Methods
131
+
132
+ #### `init()`
133
+
134
+ Initialize both detection and pose models.
135
+
136
+ ```typescript
137
+ await detector.init();
138
+ ```
139
+
140
+ #### `detectFromCanvas()`
141
+
142
+ Detect 3D poses from HTMLCanvasElement.
143
+
144
+ ```typescript
145
+ async detectFromCanvas(canvas: HTMLCanvasElement): Promise<Wholebody3DResult>
146
+ ```
147
+
148
+ #### `detectFromVideo()`
149
+
150
+ Detect 3D poses from HTMLVideoElement (for real-time video processing).
151
+
152
+ ```typescript
153
+ async detectFromVideo(
154
+ video: HTMLVideoElement,
155
+ targetCanvas?: HTMLCanvasElement
156
+ ): Promise<Wholebody3DResult>
157
+ ```
158
+
159
+ #### `detectFromImage()`
160
+
161
+ Detect 3D poses from HTMLImageElement.
162
+
163
+ ```typescript
164
+ async detectFromImage(
165
+ image: HTMLImageElement,
166
+ targetCanvas?: HTMLCanvasElement
167
+ ): Promise<Wholebody3DResult>
168
+ ```
169
+
170
+ #### `detectFromFile()`
171
+
172
+ Detect 3D poses from File object (for file uploads).
173
+
174
+ ```typescript
175
+ async detectFromFile(
176
+ file: File,
177
+ targetCanvas?: HTMLCanvasElement
178
+ ): Promise<Wholebody3DResult>
179
+ ```
180
+
181
+ #### `detectFromBlob()`
182
+
183
+ Detect 3D poses from Blob (for camera capture or downloads).
184
+
185
+ ```typescript
186
+ async detectFromBlob(
187
+ blob: Blob,
188
+ targetCanvas?: HTMLCanvasElement
189
+ ): Promise<Wholebody3DResult>
190
+ ```
191
+
192
+ #### `detect()`
193
+
194
+ Low-level method for raw image data.
195
+
196
+ ```typescript
197
+ async detect(
198
+ imageData: Uint8Array,
199
+ width: number,
200
+ height: number
201
+ ): Promise<Wholebody3DResult>
202
+ ```
203
+
204
+ #### `dispose()`
205
+
206
+ Release resources and models.
207
+
208
+ ```typescript
209
+ detector.dispose();
210
+ ```
211
+
212
+ ### Types
213
+
214
+ #### `Wholebody3DResult`
215
+
216
+ ```typescript
217
+ interface Wholebody3DResult {
218
+ keypoints: number[][][]; // [numPeople][numKeypoints][3] - 3D coordinates
219
+ scores: number[][]; // [numPeople][numKeypoints] - confidence scores
220
+ keypointsSimcc: number[][][]; // [numPeople][numKeypoints][3] - normalized SimCC coords
221
+ keypoints2d: number[][][]; // [numPeople][numKeypoints][2] - 2D projection
222
+ }
223
+ ```
224
+
225
+ #### `Pose3DStats`
226
+
227
+ Performance statistics attached to results:
228
+
229
+ ```typescript
230
+ interface Pose3DStats {
231
+ personCount: number;
232
+ detTime: number; // Detection time (ms)
233
+ poseTime: number; // Pose estimation time (ms)
234
+ totalTime: number; // Total processing time (ms)
235
+ }
236
+ ```
237
+
238
+ Access via: `(result as any).stats`
239
+
240
+ ### Keypoint Structure
241
+
242
+ The model outputs 17 COCO keypoints per person:
243
+
244
+ | Index | Name | 3D Output |
245
+ |-------|------|-----------|
246
+ | 0 | nose | `[x, y, z]` |
247
+ | 1 | left_eye | `[x, y, z]` |
248
+ | 2 | right_eye | `[x, y, z]` |
249
+ | 3 | left_ear | `[x, y, z]` |
250
+ | 4 | right_ear | `[x, y, z]` |
251
+ | 5 | left_shoulder | `[x, y, z]` |
252
+ | 6 | right_shoulder | `[x, y, z]` |
253
+ | 7 | left_elbow | `[x, y, z]` |
254
+ | 8 | right_elbow | `[x, y, z]` |
255
+ | 9 | left_wrist | `[x, y, z]` |
256
+ | 10 | right_wrist | `[x, y, z]` |
257
+ | 11 | left_hip | `[x, y, z]` |
258
+ | 12 | right_hip | `[x, y, z]` |
259
+ | 13 | left_knee | `[x, y, z]` |
260
+ | 14 | right_knee | `[x, y, z]` |
261
+ | 15 | left_ankle | `[x, y, z]` |
262
+ | 16 | right_ankle | `[x, y, z]` |
263
+
264
+ ## Complete Example
265
+
266
+ ```typescript
267
+ import { Pose3DDetector } from 'rtmlib-ts';
268
+
269
+ async function main() {
270
+ // Initialize with default models
271
+ const detector = new Pose3DDetector();
272
+ console.log('Loading models...');
273
+ await detector.init();
274
+ console.log('Models loaded!');
275
+
276
+ // Load image
277
+ const img = new Image();
278
+ img.src = 'person.jpg';
279
+ await new Promise(resolve => img.onload = resolve);
280
+
281
+ const canvas = document.createElement('canvas');
282
+ canvas.width = img.width;
283
+ canvas.height = img.height;
284
+ const ctx = canvas.getContext('2d')!;
285
+ ctx.drawImage(img, 0, 0);
286
+
287
+ // Detect 3D poses
288
+ const startTime = performance.now();
289
+ const result = await detector.detectFromCanvas(canvas);
290
+ const endTime = performance.now();
291
+
292
+ const stats = (result as any).stats;
293
+ console.log(`Detected ${stats.personCount} people in ${stats.totalTime}ms`);
294
+ console.log(` Detection: ${stats.detTime}ms`);
295
+ console.log(` 3D Pose: ${stats.poseTime}ms`);
296
+
297
+ // Process 3D results
298
+ result.keypoints.forEach((person, personIdx) => {
299
+ console.log(`\nPerson ${personIdx + 1}:`);
300
+ person.forEach((kpt, kptIdx) => {
301
+ const score = result.scores[personIdx][kptIdx];
302
+ if (score > 0.5) {
303
+ console.log(
304
+ ` Keypoint ${kptIdx}: [${kpt[0].toFixed(3)}, ${kpt[1].toFixed(3)}, ${kpt[2].toFixed(3)}] ` +
305
+ `(score: ${score.toFixed(3)})`
306
+ );
307
+ }
308
+ });
309
+ });
310
+
311
+ // Draw 2D projection on canvas
312
+ result.keypoints2d.forEach((person, personIdx) => {
313
+ const color = `hsl(${personIdx * 60}, 80%, 50%)`;
314
+
315
+ person.forEach(([x, y], kptIdx) => {
316
+ const score = result.scores[personIdx][kptIdx];
317
+ if (score > 0.5) {
318
+ ctx.fillStyle = color;
319
+ ctx.beginPath();
320
+ ctx.arc(x, y, 5, 0, Math.PI * 2);
321
+ ctx.fill();
322
+ }
323
+ });
324
+ });
325
+
326
+ // Display result
327
+ document.body.appendChild(canvas);
328
+ }
329
+
330
+ main();
331
+ ```
332
+
333
+ ## Performance Optimization
334
+
335
+ ### 1. Use WebGPU Backend (if available)
336
+
337
+ ```typescript
338
+ const detector = new Pose3DDetector({
339
+ backend: 'webgpu', // Faster than WASM
340
+ });
341
+ ```
342
+
343
+ ### 2. Adjust Input Sizes
344
+
345
+ Smaller input sizes = faster inference:
346
+
347
+ ```typescript
348
+ const detector = new Pose3DDetector({
349
+ detInputSize: [416, 416], // Faster detection
350
+ poseInputSize: [256, 192], // Faster pose estimation
351
+ });
352
+ ```
353
+
354
+ ### 3. Tune Confidence Thresholds
355
+
356
+ Higher thresholds = fewer detections but faster:
357
+
358
+ ```typescript
359
+ const detector = new Pose3DDetector({
360
+ detConfidence: 0.6, // Skip low-confidence detections
361
+ poseConfidence: 0.4, // Only show confident keypoints
362
+ });
363
+ ```
364
+
365
+ ### 4. Reuse Detector Instance
366
+
367
+ ```typescript
368
+ // ✅ Reuse same instance for multiple frames
369
+ const detector = new Pose3DDetector();
370
+ await detector.init();
371
+
372
+ for (const frame of videoFrames) {
373
+ const result = await detector.detect(frame.data, frame.width, frame.height);
374
+ }
375
+ ```
376
+
377
+ ### 5. Process Every Nth Frame
378
+
379
+ For real-time video, process every few frames:
380
+
381
+ ```typescript
382
+ let frameCount = 0;
383
+ const processEvery = 3; // Process every 3rd frame
384
+
385
+ video.addEventListener('play', async () => {
386
+ while (!video.paused && !video.ended) {
387
+ frameCount++;
388
+ if (frameCount % processEvery === 0) {
389
+ const result = await detector.detectFromVideo(video);
390
+ // Process result
391
+ }
392
+ await new Promise(resolve => requestAnimationFrame(resolve));
393
+ }
394
+ });
395
+ ```
396
+
397
+ ## Browser Support
398
+
399
+ | Browser | Version | Backend |
400
+ |---------|---------|---------|
401
+ | Chrome | 94+ | WASM, WebGPU |
402
+ | Edge | 94+ | WASM, WebGPU |
403
+ | Firefox | 95+ | WASM |
404
+ | Safari | 16.4+ | WASM |
405
+
406
+ ## Performance Benchmarks
407
+
408
+ Typical inference times on M1 MacBook Pro:
409
+
410
+ | Configuration | Detection | 3D Pose (per person) | Total (3 people) |
411
+ |--------------|-----------|---------------------|------------------|
412
+ | WASM, 640×640 + 384×288 | 120ms | 45ms | 255ms |
413
+ | WASM, 416×416 + 256×192 | 60ms | 25ms | 135ms |
414
+ | WebGPU, 640×640 + 384×288 | 50ms | 20ms | 110ms |
415
+
416
+ ## Troubleshooting
417
+
418
+ ### "Model loading failed"
419
+
420
+ - Ensure models are accessible via HTTP (not `file://` protocol)
421
+ - Use a local server: `python -m http.server 8080`
422
+ - Check CORS headers
423
+
424
+ ### "Slow inference"
425
+
426
+ - Switch to WebGPU backend if available
427
+ - Reduce input sizes
428
+ - Increase confidence thresholds
429
+ - Process every Nth frame instead of all frames
430
+
431
+ ### "No detections"
432
+
433
+ - Lower `detConfidence` threshold
434
+ - Ensure person is visible and reasonably sized
435
+ - Check image format (RGB, not grayscale)
436
+
437
+ ### "Z-coordinate seems wrong"
438
+
439
+ - Z values are in metric scale (meters)
440
+ - Range is approximately -1.0 to 1.0 meters from camera
441
+ - Z is relative to the person's center
442
+
443
+ ## Custom Models
444
+
445
+ You can use any compatible ONNX models:
446
+
447
+ ```typescript
448
+ const detector = new Pose3DDetector({
449
+ detModel: 'path/to/custom_yolox.onnx',
450
+ poseModel: 'path/to/custom_rtmw3d.onnx',
451
+ detInputSize: [640, 640],
452
+ poseInputSize: [384, 288],
453
+ });
454
+ ```
455
+
456
+ ## License
457
+
458
+ Apache 2.0