rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,622 @@
1
+ /**
2
+ * PoseDetector - Unified API for person detection and pose estimation
3
+ * Combines YOLO12 detector with RTMW pose model in a single optimized interface
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * // Initialize with default models (from HuggingFace)
8
+ * const detector = new PoseDetector();
9
+ * await detector.init();
10
+ *
11
+ * // Or with custom models
12
+ * const detector = new PoseDetector({
13
+ * detModel: 'models/yolov12n.onnx',
14
+ * poseModel: 'models/rtmlib/end2end.onnx',
15
+ * });
16
+ * await detector.init();
17
+ *
18
+ * // From canvas
19
+ * const results = await detector.detectFromCanvas(canvas);
20
+ *
21
+ * // From video element
22
+ * const results = await detector.detectFromVideo(videoElement);
23
+ *
24
+ * // From raw image data
25
+ * const results = await detector.detect(imageData, width, height);
26
+ * ```
27
+ */
28
+ import * as ort from 'onnxruntime-web';
29
+ import { getCachedModel, isModelCached } from '../core/modelCache';
30
+ // Configure ONNX Runtime Web
31
+ ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
32
+ ort.env.wasm.simd = true;
33
+ ort.env.wasm.proxy = false;
34
+ /**
35
+ * COCO17 keypoint names
36
+ */
37
+ const KEYPOINT_NAMES = [
38
+ 'nose',
39
+ 'left_eye',
40
+ 'right_eye',
41
+ 'left_ear',
42
+ 'right_ear',
43
+ 'left_shoulder',
44
+ 'right_shoulder',
45
+ 'left_elbow',
46
+ 'right_elbow',
47
+ 'left_wrist',
48
+ 'right_wrist',
49
+ 'left_hip',
50
+ 'right_hip',
51
+ 'left_knee',
52
+ 'right_knee',
53
+ 'left_ankle',
54
+ 'right_ankle',
55
+ ];
56
+ /**
57
+ * Default configuration
58
+ */
59
+ const DEFAULT_CONFIG = {
60
+ detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
61
+ poseModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx',
62
+ detInputSize: [416, 416], // Faster detection
63
+ poseInputSize: [384, 288], // Required by model
64
+ detConfidence: 0.5,
65
+ nmsThreshold: 0.45,
66
+ poseConfidence: 0.3,
67
+ backend: 'webgpu', // Default to WebGPU for better performance
68
+ cache: true,
69
+ };
70
+ export class PoseDetector {
71
+ constructor(config) {
72
+ this.detSession = null;
73
+ this.poseSession = null;
74
+ this.initialized = false;
75
+ // Pre-allocated buffers for maximum performance
76
+ this.canvas = null;
77
+ this.ctx = null;
78
+ this.poseCanvas = null;
79
+ this.poseCtx = null;
80
+ this.poseTensorBuffer = null;
81
+ this.detInputSize = [416, 416];
82
+ this.poseInputSize = [384, 288];
83
+ this.config = { ...DEFAULT_CONFIG, ...config };
84
+ }
85
+ /**
86
+ * Initialize both detection and pose models with pre-allocated resources
87
+ */
88
+ async init() {
89
+ if (this.initialized)
90
+ return;
91
+ try {
92
+ // Load detection model
93
+ console.log(`[PoseDetector] Loading detection model from: ${this.config.detModel}`);
94
+ let detBuffer;
95
+ if (this.config.cache) {
96
+ const detCached = await isModelCached(this.config.detModel);
97
+ console.log(`[PoseDetector] Det model cache ${detCached ? 'hit' : 'miss'}`);
98
+ detBuffer = await getCachedModel(this.config.detModel);
99
+ }
100
+ else {
101
+ const detResponse = await fetch(this.config.detModel);
102
+ if (!detResponse.ok) {
103
+ throw new Error(`Failed to fetch det model: HTTP ${detResponse.status}`);
104
+ }
105
+ detBuffer = await detResponse.arrayBuffer();
106
+ }
107
+ this.detSession = await ort.InferenceSession.create(detBuffer, {
108
+ executionProviders: [this.config.backend],
109
+ graphOptimizationLevel: 'all',
110
+ });
111
+ console.log(`[PoseDetector] Detection model loaded, size: ${(detBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
112
+ // Load pose model
113
+ console.log(`[PoseDetector] Loading pose model from: ${this.config.poseModel}`);
114
+ let poseBuffer;
115
+ if (this.config.cache) {
116
+ const poseCached = await isModelCached(this.config.poseModel);
117
+ console.log(`[PoseDetector] Pose model cache ${poseCached ? 'hit' : 'miss'}`);
118
+ poseBuffer = await getCachedModel(this.config.poseModel);
119
+ }
120
+ else {
121
+ const poseResponse = await fetch(this.config.poseModel);
122
+ if (!poseResponse.ok) {
123
+ throw new Error(`Failed to fetch pose model: HTTP ${poseResponse.status}`);
124
+ }
125
+ poseBuffer = await poseResponse.arrayBuffer();
126
+ }
127
+ this.poseSession = await ort.InferenceSession.create(poseBuffer, {
128
+ executionProviders: [this.config.backend],
129
+ graphOptimizationLevel: 'all',
130
+ });
131
+ console.log(`[PoseDetector] Pose model loaded, size: ${(poseBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
132
+ // Pre-allocate all resources
133
+ const [detW, detH] = this.config.detInputSize;
134
+ this.detInputSize = [detW, detH];
135
+ const [poseW, poseH] = this.config.poseInputSize;
136
+ this.poseInputSize = [poseW, poseH];
137
+ // Main canvas for detection
138
+ this.canvas = document.createElement('canvas');
139
+ this.canvas.width = detW;
140
+ this.canvas.height = detH;
141
+ this.ctx = this.canvas.getContext('2d', {
142
+ willReadFrequently: true,
143
+ alpha: false
144
+ });
145
+ // Pose crop canvas (reused for each person)
146
+ this.poseCanvas = document.createElement('canvas');
147
+ this.poseCanvas.width = poseW;
148
+ this.poseCanvas.height = poseH;
149
+ this.poseCtx = this.poseCanvas.getContext('2d', {
150
+ willReadFrequently: true,
151
+ alpha: false
152
+ });
153
+ // Pre-allocate pose tensor buffer
154
+ this.poseTensorBuffer = new Float32Array(3 * poseW * poseH);
155
+ this.initialized = true;
156
+ console.log(`[PoseDetector] ✅ Initialized (det:${detW}x${detH}, pose:${poseW}x${poseH})`);
157
+ }
158
+ catch (error) {
159
+ console.error('[PoseDetector] ❌ Initialization failed:', error);
160
+ throw error;
161
+ }
162
+ }
163
+ /**
164
+ * Detect poses from HTMLCanvasElement
165
+ * @param canvas - Canvas element containing the image
166
+ * @returns Array of detected people with keypoints
167
+ */
168
+ async detectFromCanvas(canvas) {
169
+ const ctx = canvas.getContext('2d');
170
+ if (!ctx) {
171
+ throw new Error('Could not get 2D context from canvas');
172
+ }
173
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
174
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
175
+ }
176
+ /**
177
+ * Detect poses from HTMLVideoElement
178
+ * @param video - Video element to capture frame from
179
+ * @param targetCanvas - Optional canvas for frame extraction (creates one if not provided)
180
+ * @returns Array of detected people with keypoints
181
+ */
182
+ async detectFromVideo(video, targetCanvas) {
183
+ if (video.readyState < 2) {
184
+ throw new Error('Video not ready. Ensure video is loaded and playing.');
185
+ }
186
+ const canvas = targetCanvas || document.createElement('canvas');
187
+ canvas.width = video.videoWidth;
188
+ canvas.height = video.videoHeight;
189
+ const ctx = canvas.getContext('2d');
190
+ if (!ctx) {
191
+ throw new Error('Could not get 2D context from canvas');
192
+ }
193
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
194
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
195
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
196
+ }
197
+ /**
198
+ * Detect poses from HTMLImageElement
199
+ * @param image - Image element to process
200
+ * @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
201
+ * @returns Array of detected people with keypoints
202
+ */
203
+ async detectFromImage(image, targetCanvas) {
204
+ if (!image.complete || !image.naturalWidth) {
205
+ throw new Error('Image not loaded. Ensure image is fully loaded.');
206
+ }
207
+ const canvas = targetCanvas || document.createElement('canvas');
208
+ canvas.width = image.naturalWidth;
209
+ canvas.height = image.naturalHeight;
210
+ const ctx = canvas.getContext('2d');
211
+ if (!ctx) {
212
+ throw new Error('Could not get 2D context from canvas');
213
+ }
214
+ ctx.drawImage(image, 0, 0);
215
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
216
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
217
+ }
218
+ /**
219
+ * Detect poses from ImageBitmap (efficient for blob/file uploads)
220
+ * @param bitmap - ImageBitmap to process
221
+ * @param targetCanvas - Optional canvas for bitmap extraction (creates one if not provided)
222
+ * @returns Array of detected people with keypoints
223
+ */
224
+ async detectFromBitmap(bitmap, targetCanvas) {
225
+ const canvas = targetCanvas || document.createElement('canvas');
226
+ canvas.width = bitmap.width;
227
+ canvas.height = bitmap.height;
228
+ const ctx = canvas.getContext('2d');
229
+ if (!ctx) {
230
+ throw new Error('Could not get 2D context from canvas');
231
+ }
232
+ ctx.drawImage(bitmap, 0, 0);
233
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
234
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
235
+ }
236
+ /**
237
+ * Detect poses from File (for file input uploads)
238
+ * @param file - File object from input element
239
+ * @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
240
+ * @returns Array of detected people with keypoints
241
+ */
242
+ async detectFromFile(file, targetCanvas) {
243
+ return new Promise((resolve, reject) => {
244
+ const img = new Image();
245
+ img.onload = async () => {
246
+ try {
247
+ const results = await this.detectFromImage(img, targetCanvas);
248
+ resolve(results);
249
+ }
250
+ catch (error) {
251
+ reject(error);
252
+ }
253
+ };
254
+ img.onerror = () => reject(new Error('Failed to load image from file'));
255
+ img.src = URL.createObjectURL(file);
256
+ });
257
+ }
258
+ /**
259
+ * Detect poses from Blob (for camera capture or downloads)
260
+ * @param blob - Blob object to process
261
+ * @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
262
+ * @returns Array of detected people with keypoints
263
+ */
264
+ async detectFromBlob(blob, targetCanvas) {
265
+ const bitmap = await createImageBitmap(blob);
266
+ const results = await this.detectFromBitmap(bitmap, targetCanvas);
267
+ bitmap.close();
268
+ return results;
269
+ }
270
+ /**
271
+ * Detect people and estimate poses in a single call
272
+ * @param imageData - Image data (Uint8Array RGB/RGBA)
273
+ * @param width - Image width
274
+ * @param height - Image height
275
+ * @returns Array of detected people with keypoints
276
+ */
277
+ async detect(imageData, width, height) {
278
+ if (!this.initialized) {
279
+ await this.init();
280
+ }
281
+ const startTime = performance.now();
282
+ // Step 1: Detect people
283
+ const detStart = performance.now();
284
+ const bboxes = await this.detectPeople(imageData, width, height);
285
+ const detTime = performance.now() - detStart;
286
+ // Step 2: Estimate poses for each person
287
+ const poseStart = performance.now();
288
+ const people = [];
289
+ for (const bbox of bboxes) {
290
+ const keypoints = await this.estimatePose(imageData, width, height, bbox);
291
+ people.push({
292
+ bbox: {
293
+ x1: bbox.x1,
294
+ y1: bbox.y1,
295
+ x2: bbox.x2,
296
+ y2: bbox.y2,
297
+ confidence: bbox.confidence,
298
+ },
299
+ keypoints,
300
+ scores: keypoints.map(k => k.score),
301
+ });
302
+ }
303
+ const poseTime = performance.now() - poseStart;
304
+ const totalTime = performance.now() - startTime;
305
+ // Attach stats (for debugging)
306
+ people.stats = {
307
+ personCount: people.length,
308
+ detTime: Math.round(detTime),
309
+ poseTime: Math.round(poseTime),
310
+ totalTime: Math.round(totalTime),
311
+ };
312
+ return people;
313
+ }
314
+ /**
315
+ * Get detection and pose statistics from last call
316
+ */
317
+ getStats() {
318
+ return null; // Stats attached to results
319
+ }
320
+ /**
321
+ * Detect people using YOLO12
322
+ */
323
+ async detectPeople(imageData, width, height) {
324
+ const [inputH, inputW] = this.config.detInputSize;
325
+ // Preprocess
326
+ const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocessYOLO(imageData, width, height, [inputW, inputH]);
327
+ // Inference - use dynamic input name
328
+ const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
329
+ const inputName = this.detSession.inputNames[0]; // Dynamic: 'images' or 'pixel_values'
330
+ const feeds = {};
331
+ feeds[inputName] = inputTensor;
332
+ const results = await this.detSession.run(feeds);
333
+ const output = results[this.detSession.outputNames[0]];
334
+ // Postprocess
335
+ return this.postprocessYOLO(output.data, output.dims[1], width, height, paddingX, paddingY, scaleX, scaleY);
336
+ }
337
+ /**
338
+ * Estimate pose for a single person
339
+ */
340
+ async estimatePose(imageData, imgWidth, imgHeight, bbox) {
341
+ const [inputH, inputW] = this.config.poseInputSize;
342
+ // Preprocess
343
+ const { tensor, center, scale } = this.preprocessPose(imageData, imgWidth, imgHeight, bbox, [inputW, inputH]);
344
+ // Inference
345
+ const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
346
+ const results = await this.poseSession.run({ input: inputTensor });
347
+ // Postprocess
348
+ return this.postprocessPose(results.simcc_x.data, results.simcc_y.data, results.simcc_x.dims, results.simcc_y.dims, center, scale);
349
+ }
350
+ /**
351
+ * YOLO preprocessing with letterbox
352
+ */
353
+ preprocessYOLO(imageData, imgWidth, imgHeight, inputSize) {
354
+ const [inputW, inputH] = inputSize;
355
+ // Reuse canvas
356
+ if (!this.canvas || !this.ctx) {
357
+ this.canvas = document.createElement('canvas');
358
+ this.ctx = this.canvas.getContext('2d', { willReadFrequently: true });
359
+ }
360
+ this.canvas.width = inputW;
361
+ this.canvas.height = inputH;
362
+ const ctx = this.ctx;
363
+ // Black background
364
+ ctx.fillStyle = '#000000';
365
+ ctx.fillRect(0, 0, inputW, inputH);
366
+ // Calculate letterbox
367
+ const aspectRatio = imgWidth / imgHeight;
368
+ const targetAspectRatio = inputW / inputH;
369
+ let drawWidth, drawHeight, offsetX, offsetY;
370
+ if (aspectRatio > targetAspectRatio) {
371
+ drawWidth = inputW;
372
+ drawHeight = Math.floor(inputW / aspectRatio);
373
+ offsetX = 0;
374
+ offsetY = Math.floor((inputH - drawHeight) / 2);
375
+ }
376
+ else {
377
+ drawHeight = inputH;
378
+ drawWidth = Math.floor(inputH * aspectRatio);
379
+ offsetX = Math.floor((inputW - drawWidth) / 2);
380
+ offsetY = 0;
381
+ }
382
+ // Create source canvas
383
+ const srcCanvas = document.createElement('canvas');
384
+ const srcCtx = srcCanvas.getContext('2d');
385
+ srcCanvas.width = imgWidth;
386
+ srcCanvas.height = imgHeight;
387
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
388
+ srcImageData.data.set(imageData);
389
+ srcCtx.putImageData(srcImageData, 0, 0);
390
+ // Draw
391
+ ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
392
+ const paddedData = ctx.getImageData(0, 0, inputW, inputH);
393
+ // Normalize to [0, 1] and convert to CHW
394
+ const tensor = new Float32Array(inputW * inputH * 3);
395
+ for (let i = 0; i < paddedData.data.length; i += 4) {
396
+ const pixelIdx = i / 4;
397
+ tensor[pixelIdx] = paddedData.data[i] / 255;
398
+ tensor[pixelIdx + inputW * inputH] = paddedData.data[i + 1] / 255;
399
+ tensor[pixelIdx + 2 * inputW * inputH] = paddedData.data[i + 2] / 255;
400
+ }
401
+ const scaleX = imgWidth / drawWidth;
402
+ const scaleY = imgHeight / drawHeight;
403
+ return {
404
+ tensor,
405
+ paddingX: offsetX,
406
+ paddingY: offsetY,
407
+ scaleX,
408
+ scaleY,
409
+ };
410
+ }
411
+ /**
412
+ * YOLO postprocessing with NMS
413
+ */
414
+ postprocessYOLO(output, numDetections, imgWidth, imgHeight, paddingX, paddingY, scaleX, scaleY) {
415
+ const detections = [];
416
+ for (let i = 0; i < numDetections; i++) {
417
+ const idx = i * 6;
418
+ const x1 = output[idx];
419
+ const y1 = output[idx + 1];
420
+ const x2 = output[idx + 2];
421
+ const y2 = output[idx + 3];
422
+ const confidence = output[idx + 4];
423
+ const classId = Math.round(output[idx + 5]);
424
+ if (confidence < this.config.detConfidence || classId !== 0)
425
+ continue;
426
+ // Transform coordinates
427
+ const tx1 = (x1 - paddingX) * scaleX;
428
+ const ty1 = (y1 - paddingY) * scaleY;
429
+ const tx2 = (x2 - paddingX) * scaleX;
430
+ const ty2 = (y2 - paddingY) * scaleY;
431
+ detections.push({
432
+ x1: Math.max(0, tx1),
433
+ y1: Math.max(0, ty1),
434
+ x2: Math.min(imgWidth, tx2),
435
+ y2: Math.min(imgHeight, ty2),
436
+ confidence,
437
+ });
438
+ }
439
+ // NMS
440
+ return this.applyNMS(detections, this.config.nmsThreshold);
441
+ }
442
+ /**
443
+ * Pose preprocessing with affine crop
444
+ */
445
+ preprocessPose(imageData, imgWidth, imgHeight, bbox, inputSize) {
446
+ const [inputW, inputH] = inputSize;
447
+ const bboxWidth = bbox.x2 - bbox.x1;
448
+ const bboxHeight = bbox.y2 - bbox.y1;
449
+ const center = [
450
+ bbox.x1 + bboxWidth / 2,
451
+ bbox.y1 + bboxHeight / 2,
452
+ ];
453
+ // Aspect ratio preservation
454
+ const bboxAspectRatio = bboxWidth / bboxHeight;
455
+ const modelAspectRatio = inputW / inputH;
456
+ let scaleW, scaleH;
457
+ if (bboxAspectRatio > modelAspectRatio) {
458
+ scaleW = bboxWidth * 1.25;
459
+ scaleH = scaleW / modelAspectRatio;
460
+ }
461
+ else {
462
+ scaleH = bboxHeight * 1.25;
463
+ scaleW = scaleH * modelAspectRatio;
464
+ }
465
+ const scale = [scaleW, scaleH];
466
+ // Reuse pre-allocated pose canvas
467
+ if (!this.poseCanvas || !this.poseCtx) {
468
+ this.poseCanvas = document.createElement('canvas');
469
+ this.poseCanvas.width = inputW;
470
+ this.poseCanvas.height = inputH;
471
+ this.poseCtx = this.poseCanvas.getContext('2d', {
472
+ willReadFrequently: true,
473
+ alpha: false
474
+ });
475
+ this.poseTensorBuffer = new Float32Array(3 * inputW * inputH);
476
+ }
477
+ const ctx = this.poseCtx;
478
+ // Fast clear
479
+ ctx.clearRect(0, 0, inputW, inputH);
480
+ // Create source
481
+ const srcCanvas = document.createElement('canvas');
482
+ const srcCtx = srcCanvas.getContext('2d');
483
+ srcCanvas.width = imgWidth;
484
+ srcCanvas.height = imgHeight;
485
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
486
+ srcImageData.data.set(imageData);
487
+ srcCtx.putImageData(srcImageData, 0, 0);
488
+ // Crop and scale
489
+ const srcX = center[0] - scaleW / 2;
490
+ const srcY = center[1] - scaleH / 2;
491
+ ctx.drawImage(srcCanvas, srcX, srcY, scaleW, scaleH, 0, 0, inputW, inputH);
492
+ const croppedData = ctx.getImageData(0, 0, inputW, inputH);
493
+ // Optimized normalization with precomputed constants
494
+ const tensor = this.poseTensorBuffer;
495
+ const len = croppedData.data.length;
496
+ const planeSize = inputW * inputH;
497
+ // Precompute normalization constants
498
+ const mean0 = 123.675, mean1 = 116.28, mean2 = 103.53;
499
+ const stdInv0 = 1 / 58.395, stdInv1 = 1 / 57.12, stdInv2 = 1 / 57.375;
500
+ // Unrolled loop (4 pixels at once)
501
+ for (let i = 0; i < len; i += 16) {
502
+ const p1 = i / 4, p2 = p1 + 1, p3 = p1 + 2, p4 = p1 + 3;
503
+ // R channel
504
+ tensor[p1] = (croppedData.data[i] - mean0) * stdInv0;
505
+ tensor[p2] = (croppedData.data[i + 4] - mean0) * stdInv0;
506
+ tensor[p3] = (croppedData.data[i + 8] - mean0) * stdInv0;
507
+ tensor[p4] = (croppedData.data[i + 12] - mean0) * stdInv0;
508
+ // G channel
509
+ tensor[p1 + planeSize] = (croppedData.data[i + 1] - mean1) * stdInv1;
510
+ tensor[p2 + planeSize] = (croppedData.data[i + 5] - mean1) * stdInv1;
511
+ tensor[p3 + planeSize] = (croppedData.data[i + 9] - mean1) * stdInv1;
512
+ tensor[p4 + planeSize] = (croppedData.data[i + 13] - mean1) * stdInv1;
513
+ // B channel
514
+ tensor[p1 + planeSize * 2] = (croppedData.data[i + 2] - mean2) * stdInv2;
515
+ tensor[p2 + planeSize * 2] = (croppedData.data[i + 6] - mean2) * stdInv2;
516
+ tensor[p3 + planeSize * 2] = (croppedData.data[i + 10] - mean2) * stdInv2;
517
+ tensor[p4 + planeSize * 2] = (croppedData.data[i + 14] - mean2) * stdInv2;
518
+ }
519
+ return { tensor, center, scale };
520
+ }
521
+ /**
522
+ * Pose postprocessing with SimCC decoding
523
+ */
524
+ postprocessPose(simccX, simccY, shapeX, shapeY, center, scale) {
525
+ const numKeypoints = shapeX[1];
526
+ const wx = shapeX[2];
527
+ const wy = shapeY[2];
528
+ const keypoints = [];
529
+ for (let k = 0; k < numKeypoints; k++) {
530
+ // Argmax X
531
+ let maxX = -Infinity;
532
+ let argmaxX = 0;
533
+ for (let i = 0; i < wx; i++) {
534
+ const val = simccX[k * wx + i];
535
+ if (val > maxX) {
536
+ maxX = val;
537
+ argmaxX = i;
538
+ }
539
+ }
540
+ // Argmax Y
541
+ let maxY = -Infinity;
542
+ let argmaxY = 0;
543
+ for (let i = 0; i < wy; i++) {
544
+ const val = simccY[k * wy + i];
545
+ if (val > maxY) {
546
+ maxY = val;
547
+ argmaxY = i;
548
+ }
549
+ }
550
+ const score = 0.5 * (maxX + maxY);
551
+ const visible = score > this.config.poseConfidence;
552
+ // Transform to original coordinates
553
+ const normX = argmaxX / wx;
554
+ const normY = argmaxY / wy;
555
+ const x = (normX - 0.5) * scale[0] + center[0];
556
+ const y = (normY - 0.5) * scale[1] + center[1];
557
+ keypoints.push({
558
+ x,
559
+ y,
560
+ score,
561
+ visible,
562
+ name: KEYPOINT_NAMES[k] || `keypoint_${k}`,
563
+ });
564
+ }
565
+ return keypoints;
566
+ }
567
+ /**
568
+ * Non-Maximum Suppression
569
+ */
570
+ applyNMS(detections, iouThreshold) {
571
+ if (detections.length === 0)
572
+ return [];
573
+ detections.sort((a, b) => b.confidence - a.confidence);
574
+ const selected = [];
575
+ const used = new Set();
576
+ for (let i = 0; i < detections.length; i++) {
577
+ if (used.has(i))
578
+ continue;
579
+ selected.push(detections[i]);
580
+ used.add(i);
581
+ for (let j = i + 1; j < detections.length; j++) {
582
+ if (used.has(j))
583
+ continue;
584
+ const iou = this.calculateIoU(detections[i], detections[j]);
585
+ if (iou > iouThreshold) {
586
+ used.add(j);
587
+ }
588
+ }
589
+ }
590
+ return selected;
591
+ }
592
+ /**
593
+ * Calculate IoU between two boxes
594
+ */
595
+ calculateIoU(box1, box2) {
596
+ const x1 = Math.max(box1.x1, box2.x1);
597
+ const y1 = Math.max(box1.y1, box2.y1);
598
+ const x2 = Math.min(box1.x2, box2.x2);
599
+ const y2 = Math.min(box1.y2, box2.y2);
600
+ if (x2 <= x1 || y2 <= y1)
601
+ return 0;
602
+ const intersection = (x2 - x1) * (y2 - y1);
603
+ const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
604
+ const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
605
+ const union = area1 + area2 - intersection;
606
+ return intersection / union;
607
+ }
608
+ /**
609
+ * Dispose resources
610
+ */
611
+ dispose() {
612
+ if (this.detSession) {
613
+ this.detSession.release();
614
+ this.detSession = null;
615
+ }
616
+ if (this.poseSession) {
617
+ this.poseSession.release();
618
+ this.poseSession = null;
619
+ }
620
+ this.initialized = false;
621
+ }
622
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * PoseTracker - tracks poses across frames with cached detections
3
+ * Reduces detection frequency for better performance
4
+ */
5
+ import { Wholebody } from './wholebody';
6
+ export declare class PoseTracker {
7
+ private wholebody;
8
+ private detFrequency;
9
+ private cachedBoxes;
10
+ private frameCount;
11
+ private nextId;
12
+ constructor(WholebodyClass: typeof Wholebody, detFrequency?: number, toOpenpose?: boolean, mode?: 'performance' | 'lightweight' | 'balanced', backend?: 'onnxruntime', device?: string);
13
+ init(): Promise<void>;
14
+ call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<{
15
+ keypoints: number[][];
16
+ scores: number[];
17
+ }>;
18
+ private updateCachedBoxes;
19
+ private calculateIoU;
20
+ private cleanupCachedBoxes;
21
+ }
22
+ //# sourceMappingURL=poseTracker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"poseTracker.d.ts","sourceRoot":"","sources":["../../src/solution/poseTracker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,qBAAa,WAAW;IACtB,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,MAAM,CAAa;gBAGzB,cAAc,EAAE,OAAO,SAAS,EAChC,YAAY,GAAE,MAAU,EACxB,UAAU,GAAE,OAAe,EAC3B,IAAI,GAAE,aAAa,GAAG,aAAa,GAAG,UAAuB,EAC7D,OAAO,GAAE,aAA6B,EACtC,MAAM,GAAE,MAAc;IAelB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAIrB,IAAI,CACR,KAAK,EAAE,UAAU,EACjB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAmBvD,OAAO,CAAC,iBAAiB;IA6EzB,OAAO,CAAC,YAAY;IAiBpB,OAAO,CAAC,kBAAkB;CAO3B"}