rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,663 @@
1
+ /**
2
+ * AnimalDetector - Animal detection and pose estimation API
3
+ * Supports 30 animal classes with ViTPose++ pose model
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * // Initialize with default models
8
+ * const detector = new AnimalDetector();
9
+ * await detector.init();
10
+ *
11
+ * // Detect animals
12
+ * const animals = await detector.detectFromCanvas(canvas);
13
+ * console.log(`Found ${animals.length} animals`);
14
+ *
15
+ * // With custom models
16
+ * const detector2 = new AnimalDetector({
17
+ * detModel: 'path/to/yolox_animal.onnx',
18
+ * poseModel: 'path/to/vitpose_animal.onnx',
19
+ * });
20
+ * ```
21
+ */
22
+ import * as ort from 'onnxruntime-web';
23
+ import { getCachedModel, isModelCached } from '../core/modelCache';
24
+ // Configure ONNX Runtime Web
25
+ ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
26
+ ort.env.wasm.simd = true;
27
+ ort.env.wasm.proxy = false;
28
+ /**
29
+ * 30 Animal class names supported by AnimalDetector
30
+ */
31
+ export const ANIMAL_CLASSES = [
32
+ 'gorilla',
33
+ 'spider-monkey',
34
+ 'howling-monkey',
35
+ 'zebra',
36
+ 'elephant',
37
+ 'hippo',
38
+ 'raccon',
39
+ 'rhino',
40
+ 'giraffe',
41
+ 'tiger',
42
+ 'deer',
43
+ 'lion',
44
+ 'panda',
45
+ 'cheetah',
46
+ 'black-bear',
47
+ 'polar-bear',
48
+ 'antelope',
49
+ 'fox',
50
+ 'buffalo',
51
+ 'cow',
52
+ 'wolf',
53
+ 'dog',
54
+ 'sheep',
55
+ 'cat',
56
+ 'horse',
57
+ 'rabbit',
58
+ 'pig',
59
+ 'chimpanzee',
60
+ 'monkey',
61
+ 'orangutan',
62
+ ];
63
+ /**
64
+ * Available ViTPose++ models for animal pose estimation
65
+ * All models are trained on 6 datasets and support 30 animal classes
66
+ */
67
+ export const VITPOSE_MODELS = {
68
+ /** ViTPose++-s: Fastest, 74.2 AP on AP10K */
69
+ 'vitpose-s': {
70
+ name: 'ViTPose++-s',
71
+ url: 'https://huggingface.co/JunkyByte/easy_ViTPose/resolve/main/onnx/apt36k/vitpose-s-apt36k.onnx',
72
+ inputSize: [256, 192],
73
+ ap: 74.2,
74
+ description: 'Fastest inference, suitable for real-time applications',
75
+ },
76
+ /** ViTPose++-b: Balanced, 75.9 AP on AP10K */
77
+ 'vitpose-b': {
78
+ name: 'ViTPose++-b',
79
+ url: 'https://huggingface.co/JunkyByte/easy_ViTPose/resolve/main/onnx/apt36k/vitpose-b-apt36k.onnx',
80
+ inputSize: [256, 192],
81
+ ap: 75.9,
82
+ description: 'Balanced speed and accuracy',
83
+ },
84
+ /** ViTPose++-l: Most accurate, 80.8 AP on AP10K */
85
+ 'vitpose-l': {
86
+ name: 'ViTPose++-l',
87
+ url: 'https://huggingface.co/JunkyByte/easy_ViTPose/resolve/main/onnx/apt36k/vitpose-h-apt36k.onnx',
88
+ inputSize: [256, 192],
89
+ ap: 80.8,
90
+ description: 'Highest accuracy, slower inference',
91
+ },
92
+ };
93
+ /**
94
+ * COCO17 keypoint names (used for animal pose)
95
+ */
96
+ const KEYPOINT_NAMES = [
97
+ 'nose',
98
+ 'left_eye',
99
+ 'right_eye',
100
+ 'left_ear',
101
+ 'right_ear',
102
+ 'left_shoulder',
103
+ 'right_shoulder',
104
+ 'left_elbow',
105
+ 'right_elbow',
106
+ 'left_wrist',
107
+ 'right_wrist',
108
+ 'left_hip',
109
+ 'right_hip',
110
+ 'left_knee',
111
+ 'right_knee',
112
+ 'left_ankle',
113
+ 'right_ankle',
114
+ ];
115
+ /**
116
+ * Default configuration - uses ViTPose++-b model
117
+ */
118
+ const DEFAULT_CONFIG = {
119
+ detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
120
+ poseModel: undefined, // Will be set from poseModelType
121
+ poseModelType: 'vitpose-b',
122
+ detInputSize: [640, 640],
123
+ poseInputSize: [256, 192],
124
+ detConfidence: 0.5,
125
+ nmsThreshold: 0.45,
126
+ poseConfidence: 0.3,
127
+ backend: 'webgpu', // Default to WebGPU for better performance
128
+ cache: true,
129
+ classes: null,
130
+ };
131
+ export class AnimalDetector {
132
+ constructor(config = {}) {
133
+ this.detSession = null;
134
+ this.poseSession = null;
135
+ this.initialized = false;
136
+ this.classFilter = null;
137
+ // Pre-allocated buffers
138
+ this.canvas = null;
139
+ this.ctx = null;
140
+ this.poseCanvas = null;
141
+ this.poseCtx = null;
142
+ this.poseTensorBuffer = null;
143
+ this.detInputSize = [640, 640];
144
+ this.poseInputSize = [256, 192];
145
+ // Resolve pose model URL from poseModelType if poseModel not explicitly provided
146
+ let finalConfig = { ...DEFAULT_CONFIG, ...config };
147
+ if (!config.poseModel && config.poseModelType) {
148
+ const vitposeModel = VITPOSE_MODELS[config.poseModelType];
149
+ finalConfig.poseModel = vitposeModel.url;
150
+ finalConfig.poseInputSize = vitposeModel.inputSize;
151
+ }
152
+ else if (!config.poseModel && !config.poseModelType) {
153
+ // Use default vitpose-b
154
+ finalConfig.poseModel = VITPOSE_MODELS['vitpose-b'].url;
155
+ finalConfig.poseInputSize = VITPOSE_MODELS['vitpose-b'].inputSize;
156
+ }
157
+ this.config = finalConfig;
158
+ this.updateClassFilter();
159
+ }
160
+ /**
161
+ * Update class filter based on config
162
+ */
163
+ updateClassFilter() {
164
+ if (!this.config.classes) {
165
+ this.classFilter = null;
166
+ return;
167
+ }
168
+ this.classFilter = new Set();
169
+ this.config.classes.forEach((className) => {
170
+ const classId = ANIMAL_CLASSES.indexOf(className.toLowerCase());
171
+ if (classId !== -1) {
172
+ this.classFilter.add(classId);
173
+ }
174
+ else {
175
+ console.warn(`[AnimalDetector] Unknown class: ${className}`);
176
+ }
177
+ });
178
+ }
179
+ /**
180
+ * Set which animal classes to detect
181
+ */
182
+ setClasses(classes) {
183
+ this.config.classes = classes;
184
+ this.updateClassFilter();
185
+ }
186
+ /**
187
+ * Get list of available animal classes
188
+ */
189
+ getAvailableClasses() {
190
+ return [...ANIMAL_CLASSES];
191
+ }
192
+ /**
193
+ * Get information about the current ViTPose++ model
194
+ */
195
+ getPoseModelInfo() {
196
+ const modelType = this.config.poseModelType;
197
+ if (modelType && VITPOSE_MODELS[modelType]) {
198
+ return VITPOSE_MODELS[modelType];
199
+ }
200
+ return null;
201
+ }
202
+ /**
203
+ * Initialize both detection and pose models
204
+ */
205
+ async init() {
206
+ if (this.initialized)
207
+ return;
208
+ try {
209
+ // Load detection model
210
+ console.log(`[AnimalDetector] Loading detection model from: ${this.config.detModel}`);
211
+ let detBuffer;
212
+ if (this.config.cache) {
213
+ const detCached = await isModelCached(this.config.detModel);
214
+ console.log(`[AnimalDetector] Det model cache ${detCached ? 'hit' : 'miss'}`);
215
+ detBuffer = await getCachedModel(this.config.detModel);
216
+ }
217
+ else {
218
+ const detResponse = await fetch(this.config.detModel);
219
+ if (!detResponse.ok) {
220
+ throw new Error(`Failed to fetch det model: HTTP ${detResponse.status}`);
221
+ }
222
+ detBuffer = await detResponse.arrayBuffer();
223
+ }
224
+ this.detSession = await ort.InferenceSession.create(detBuffer, {
225
+ executionProviders: [this.config.backend],
226
+ graphOptimizationLevel: 'all',
227
+ });
228
+ console.log(`[AnimalDetector] Detection model loaded, size: ${(detBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
229
+ // Load pose model
230
+ console.log(`[AnimalDetector] Loading pose model from: ${this.config.poseModel}`);
231
+ let poseBuffer;
232
+ if (this.config.cache) {
233
+ const poseCached = await isModelCached(this.config.poseModel);
234
+ console.log(`[AnimalDetector] Pose model cache ${poseCached ? 'hit' : 'miss'}`);
235
+ poseBuffer = await getCachedModel(this.config.poseModel);
236
+ }
237
+ else {
238
+ const poseResponse = await fetch(this.config.poseModel);
239
+ if (!poseResponse.ok) {
240
+ throw new Error(`Failed to fetch pose model: HTTP ${poseResponse.status}`);
241
+ }
242
+ poseBuffer = await poseResponse.arrayBuffer();
243
+ }
244
+ this.poseSession = await ort.InferenceSession.create(poseBuffer, {
245
+ executionProviders: [this.config.backend],
246
+ graphOptimizationLevel: 'all',
247
+ });
248
+ console.log(`[AnimalDetector] Pose model loaded, size: ${(poseBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
249
+ // Pre-allocate resources
250
+ const [detW, detH] = this.config.detInputSize;
251
+ this.detInputSize = [detW, detH];
252
+ const [poseW, poseH] = this.config.poseInputSize;
253
+ this.poseInputSize = [poseW, poseH];
254
+ // Main canvas for detection
255
+ this.canvas = document.createElement('canvas');
256
+ this.canvas.width = detW;
257
+ this.canvas.height = detH;
258
+ this.ctx = this.canvas.getContext('2d', {
259
+ willReadFrequently: true,
260
+ alpha: false
261
+ });
262
+ // Pose crop canvas
263
+ this.poseCanvas = document.createElement('canvas');
264
+ this.poseCanvas.width = poseW;
265
+ this.poseCanvas.height = poseH;
266
+ this.poseCtx = this.poseCanvas.getContext('2d', {
267
+ willReadFrequently: true,
268
+ alpha: false
269
+ });
270
+ // Pre-allocate pose tensor buffer
271
+ this.poseTensorBuffer = new Float32Array(3 * poseW * poseH);
272
+ this.initialized = true;
273
+ console.log(`[AnimalDetector] ✅ Initialized (det:${detW}x${detH}, pose:${poseW}x${poseH})`);
274
+ }
275
+ catch (error) {
276
+ console.error('[AnimalDetector] ❌ Initialization failed:', error);
277
+ throw error;
278
+ }
279
+ }
280
+ /**
281
+ * Detect animals from HTMLCanvasElement
282
+ */
283
+ async detectFromCanvas(canvas) {
284
+ const ctx = canvas.getContext('2d');
285
+ if (!ctx) {
286
+ throw new Error('Could not get 2D context from canvas');
287
+ }
288
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
289
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
290
+ }
291
+ /**
292
+ * Detect animals from HTMLVideoElement
293
+ */
294
+ async detectFromVideo(video, targetCanvas) {
295
+ if (video.readyState < 2) {
296
+ throw new Error('Video not ready. Ensure video is loaded and playing.');
297
+ }
298
+ const canvas = targetCanvas || document.createElement('canvas');
299
+ canvas.width = video.videoWidth;
300
+ canvas.height = video.videoHeight;
301
+ const ctx = canvas.getContext('2d');
302
+ if (!ctx) {
303
+ throw new Error('Could not get 2D context from canvas');
304
+ }
305
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
306
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
307
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
308
+ }
309
+ /**
310
+ * Detect animals from HTMLImageElement
311
+ */
312
+ async detectFromImage(image, targetCanvas) {
313
+ if (!image.complete || !image.naturalWidth) {
314
+ throw new Error('Image not loaded. Ensure image is fully loaded.');
315
+ }
316
+ const canvas = targetCanvas || document.createElement('canvas');
317
+ canvas.width = image.naturalWidth;
318
+ canvas.height = image.naturalHeight;
319
+ const ctx = canvas.getContext('2d');
320
+ if (!ctx) {
321
+ throw new Error('Could not get 2D context from canvas');
322
+ }
323
+ ctx.drawImage(image, 0, 0);
324
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
325
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
326
+ }
327
+ /**
328
+ * Detect animals from ImageBitmap
329
+ */
330
+ async detectFromBitmap(bitmap, targetCanvas) {
331
+ const canvas = targetCanvas || document.createElement('canvas');
332
+ canvas.width = bitmap.width;
333
+ canvas.height = bitmap.height;
334
+ const ctx = canvas.getContext('2d');
335
+ if (!ctx) {
336
+ throw new Error('Could not get 2D context from canvas');
337
+ }
338
+ ctx.drawImage(bitmap, 0, 0);
339
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
340
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
341
+ }
342
+ /**
343
+ * Detect animals from File
344
+ */
345
+ async detectFromFile(file, targetCanvas) {
346
+ return new Promise((resolve, reject) => {
347
+ const img = new Image();
348
+ img.onload = async () => {
349
+ try {
350
+ const results = await this.detectFromImage(img, targetCanvas);
351
+ resolve(results);
352
+ }
353
+ catch (error) {
354
+ reject(error);
355
+ }
356
+ };
357
+ img.onerror = () => reject(new Error('Failed to load image from file'));
358
+ img.src = URL.createObjectURL(file);
359
+ });
360
+ }
361
+ /**
362
+ * Detect animals from Blob
363
+ */
364
+ async detectFromBlob(blob, targetCanvas) {
365
+ const bitmap = await createImageBitmap(blob);
366
+ const results = await this.detectFromBitmap(bitmap, targetCanvas);
367
+ bitmap.close();
368
+ return results;
369
+ }
370
+ /**
371
+ * Detect animals from raw image data
372
+ */
373
+ async detect(imageData, width, height) {
374
+ if (!this.initialized) {
375
+ await this.init();
376
+ }
377
+ const startTime = performance.now();
378
+ // Step 1: Detect animals
379
+ const detStart = performance.now();
380
+ const detections = await this.detectAnimals(imageData, width, height);
381
+ const detTime = performance.now() - detStart;
382
+ // Step 2: Estimate poses for each animal
383
+ const poseStart = performance.now();
384
+ const animals = [];
385
+ for (const det of detections) {
386
+ const keypoints = await this.estimatePose(imageData, width, height, det.bbox);
387
+ animals.push({
388
+ bbox: det.bbox,
389
+ classId: det.classId,
390
+ className: det.className,
391
+ keypoints,
392
+ scores: keypoints.map(k => k.score),
393
+ });
394
+ }
395
+ const poseTime = performance.now() - poseStart;
396
+ const totalTime = performance.now() - startTime;
397
+ // Calculate stats
398
+ const classCounts = {};
399
+ animals.forEach(animal => {
400
+ classCounts[animal.className] = (classCounts[animal.className] || 0) + 1;
401
+ });
402
+ // Attach stats
403
+ animals.stats = {
404
+ animalCount: animals.length,
405
+ classCounts,
406
+ detTime: Math.round(detTime),
407
+ poseTime: Math.round(poseTime),
408
+ totalTime: Math.round(totalTime),
409
+ };
410
+ return animals;
411
+ }
412
+ /**
413
+ * Detect animals using YOLO
414
+ */
415
+ async detectAnimals(imageData, width, height) {
416
+ const [inputH, inputW] = this.config.detInputSize;
417
+ const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocessYOLO(imageData, width, height, [inputW, inputH]);
418
+ const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
419
+ const inputName = this.detSession.inputNames[0];
420
+ const feeds = {};
421
+ feeds[inputName] = inputTensor;
422
+ const results = await this.detSession.run(feeds);
423
+ const output = results[this.detSession.outputNames[0]];
424
+ return this.postprocessYOLO(output.data, output.dims[1], width, height, paddingX, paddingY, scaleX, scaleY);
425
+ }
426
+ /**
427
+ * Estimate pose for a single animal
428
+ */
429
+ async estimatePose(imageData, imgWidth, imgHeight, bbox) {
430
+ const [inputH, inputW] = this.config.poseInputSize;
431
+ const { tensor, center, scale } = this.preprocessPose(imageData, imgWidth, imgHeight, bbox, [inputW, inputH]);
432
+ const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
433
+ const results = await this.poseSession.run({ input: inputTensor });
434
+ return this.postprocessPose(results.simcc_x.data, results.simcc_y.data, results.simcc_x.dims, results.simcc_y.dims, center, scale);
435
+ }
436
+ preprocessYOLO(imageData, imgWidth, imgHeight, inputSize) {
437
+ const [inputW, inputH] = inputSize;
438
+ if (!this.canvas || !this.ctx) {
439
+ this.canvas = document.createElement('canvas');
440
+ this.canvas.width = inputW;
441
+ this.canvas.height = inputH;
442
+ this.ctx = this.canvas.getContext('2d', { willReadFrequently: true, alpha: false });
443
+ }
444
+ const ctx = this.ctx;
445
+ ctx.fillStyle = '#000000';
446
+ ctx.fillRect(0, 0, inputW, inputH);
447
+ const aspectRatio = imgWidth / imgHeight;
448
+ const targetAspectRatio = inputW / inputH;
449
+ let drawWidth, drawHeight, offsetX, offsetY;
450
+ if (aspectRatio > targetAspectRatio) {
451
+ drawWidth = inputW;
452
+ drawHeight = Math.floor(inputW / aspectRatio);
453
+ offsetX = 0;
454
+ offsetY = Math.floor((inputH - drawHeight) / 2);
455
+ }
456
+ else {
457
+ drawHeight = inputH;
458
+ drawWidth = Math.floor(inputH * aspectRatio);
459
+ offsetX = Math.floor((inputW - drawWidth) / 2);
460
+ offsetY = 0;
461
+ }
462
+ const srcCanvas = document.createElement('canvas');
463
+ const srcCtx = srcCanvas.getContext('2d');
464
+ srcCanvas.width = imgWidth;
465
+ srcCanvas.height = imgHeight;
466
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
467
+ srcImageData.data.set(imageData);
468
+ srcCtx.putImageData(srcImageData, 0, 0);
469
+ ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
470
+ const paddedData = ctx.getImageData(0, 0, inputW, inputH);
471
+ const tensor = new Float32Array(inputW * inputH * 3);
472
+ for (let i = 0; i < paddedData.data.length; i += 4) {
473
+ const pixelIdx = i / 4;
474
+ tensor[pixelIdx] = paddedData.data[i] / 255;
475
+ tensor[pixelIdx + inputW * inputH] = paddedData.data[i + 1] / 255;
476
+ tensor[pixelIdx + 2 * inputW * inputH] = paddedData.data[i + 2] / 255;
477
+ }
478
+ const scaleX = imgWidth / drawWidth;
479
+ const scaleY = imgHeight / drawHeight;
480
+ return { tensor, paddingX: offsetX, paddingY: offsetY, scaleX, scaleY };
481
+ }
482
+ postprocessYOLO(output, numDetections, imgWidth, imgHeight, paddingX, paddingY, scaleX, scaleY) {
483
+ const detections = [];
484
+ for (let i = 0; i < numDetections; i++) {
485
+ const idx = i * 6;
486
+ const x1 = output[idx];
487
+ const y1 = output[idx + 1];
488
+ const x2 = output[idx + 2];
489
+ const y2 = output[idx + 3];
490
+ const confidence = output[idx + 4];
491
+ const classId = Math.round(output[idx + 5]);
492
+ if (confidence < this.config.detConfidence)
493
+ continue;
494
+ if (this.classFilter && !this.classFilter.has(classId))
495
+ continue;
496
+ const tx1 = (x1 - paddingX) * scaleX;
497
+ const ty1 = (y1 - paddingY) * scaleY;
498
+ const tx2 = (x2 - paddingX) * scaleX;
499
+ const ty2 = (y2 - paddingY) * scaleY;
500
+ detections.push({
501
+ bbox: {
502
+ x1: Math.max(0, tx1),
503
+ y1: Math.max(0, ty1),
504
+ x2: Math.min(imgWidth, tx2),
505
+ y2: Math.min(imgHeight, ty2),
506
+ confidence,
507
+ },
508
+ classId,
509
+ className: ANIMAL_CLASSES[classId] || `animal_${classId}`,
510
+ });
511
+ }
512
+ return this.applyNMS(detections, this.config.nmsThreshold);
513
+ }
514
+ preprocessPose(imageData, imgWidth, imgHeight, bbox, inputSize) {
515
+ const [inputW, inputH] = inputSize;
516
+ const bboxWidth = bbox.x2 - bbox.x1;
517
+ const bboxHeight = bbox.y2 - bbox.y1;
518
+ const center = [
519
+ bbox.x1 + bboxWidth / 2,
520
+ bbox.y1 + bboxHeight / 2,
521
+ ];
522
+ const bboxAspectRatio = bboxWidth / bboxHeight;
523
+ const modelAspectRatio = inputW / inputH;
524
+ let scaleW, scaleH;
525
+ if (bboxAspectRatio > modelAspectRatio) {
526
+ scaleW = bboxWidth * 1.25;
527
+ scaleH = scaleW / modelAspectRatio;
528
+ }
529
+ else {
530
+ scaleH = bboxHeight * 1.25;
531
+ scaleW = scaleH * modelAspectRatio;
532
+ }
533
+ const scale = [scaleW, scaleH];
534
+ if (!this.poseCanvas || !this.poseCtx) {
535
+ this.poseCanvas = document.createElement('canvas');
536
+ this.poseCanvas.width = inputW;
537
+ this.poseCanvas.height = inputH;
538
+ this.poseCtx = this.poseCanvas.getContext('2d', { willReadFrequently: true, alpha: false });
539
+ this.poseTensorBuffer = new Float32Array(3 * inputW * inputH);
540
+ }
541
+ const ctx = this.poseCtx;
542
+ ctx.clearRect(0, 0, inputW, inputH);
543
+ const srcCanvas = document.createElement('canvas');
544
+ const srcCtx = srcCanvas.getContext('2d');
545
+ srcCanvas.width = imgWidth;
546
+ srcCanvas.height = imgHeight;
547
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
548
+ srcImageData.data.set(imageData);
549
+ srcCtx.putImageData(srcImageData, 0, 0);
550
+ const srcX = center[0] - scaleW / 2;
551
+ const srcY = center[1] - scaleH / 2;
552
+ ctx.drawImage(srcCanvas, srcX, srcY, scaleW, scaleH, 0, 0, inputW, inputH);
553
+ const croppedData = ctx.getImageData(0, 0, inputW, inputH);
554
+ const tensor = this.poseTensorBuffer;
555
+ const len = croppedData.data.length;
556
+ const planeSize = inputW * inputH;
557
+ const mean0 = 123.675, mean1 = 116.28, mean2 = 103.53;
558
+ const stdInv0 = 1 / 58.395, stdInv1 = 1 / 57.12, stdInv2 = 1 / 57.375;
559
+ for (let i = 0; i < len; i += 16) {
560
+ const p1 = i / 4, p2 = p1 + 1, p3 = p1 + 2, p4 = p1 + 3;
561
+ tensor[p1] = (croppedData.data[i] - mean0) * stdInv0;
562
+ tensor[p2] = (croppedData.data[i + 4] - mean0) * stdInv0;
563
+ tensor[p3] = (croppedData.data[i + 8] - mean0) * stdInv0;
564
+ tensor[p4] = (croppedData.data[i + 12] - mean0) * stdInv0;
565
+ tensor[p1 + planeSize] = (croppedData.data[i + 1] - mean1) * stdInv1;
566
+ tensor[p2 + planeSize] = (croppedData.data[i + 5] - mean1) * stdInv1;
567
+ tensor[p3 + planeSize] = (croppedData.data[i + 9] - mean1) * stdInv1;
568
+ tensor[p4 + planeSize] = (croppedData.data[i + 13] - mean1) * stdInv1;
569
+ tensor[p1 + planeSize * 2] = (croppedData.data[i + 2] - mean2) * stdInv2;
570
+ tensor[p2 + planeSize * 2] = (croppedData.data[i + 6] - mean2) * stdInv2;
571
+ tensor[p3 + planeSize * 2] = (croppedData.data[i + 10] - mean2) * stdInv2;
572
+ tensor[p4 + planeSize * 2] = (croppedData.data[i + 14] - mean2) * stdInv2;
573
+ }
574
+ return { tensor, center, scale };
575
+ }
576
+ postprocessPose(simccX, simccY, shapeX, shapeY, center, scale) {
577
+ const numKeypoints = shapeX[1];
578
+ const wx = shapeX[2];
579
+ const wy = shapeY[2];
580
+ const keypoints = [];
581
+ for (let k = 0; k < numKeypoints; k++) {
582
+ let maxX = -Infinity, argmaxX = 0;
583
+ for (let i = 0; i < wx; i++) {
584
+ const val = simccX[k * wx + i];
585
+ if (val > maxX) {
586
+ maxX = val;
587
+ argmaxX = i;
588
+ }
589
+ }
590
+ let maxY = -Infinity, argmaxY = 0;
591
+ for (let i = 0; i < wy; i++) {
592
+ const val = simccY[k * wy + i];
593
+ if (val > maxY) {
594
+ maxY = val;
595
+ argmaxY = i;
596
+ }
597
+ }
598
+ const score = 0.5 * (maxX + maxY);
599
+ const visible = score > this.config.poseConfidence;
600
+ const normX = argmaxX / wx;
601
+ const normY = argmaxY / wy;
602
+ const x = (normX - 0.5) * scale[0] + center[0];
603
+ const y = (normY - 0.5) * scale[1] + center[1];
604
+ keypoints.push({
605
+ x,
606
+ y,
607
+ score,
608
+ visible,
609
+ name: KEYPOINT_NAMES[k] || `keypoint_${k}`,
610
+ });
611
+ }
612
+ return keypoints;
613
+ }
614
+ applyNMS(detections, iouThreshold) {
615
+ if (detections.length === 0)
616
+ return [];
617
+ detections.sort((a, b) => b.bbox.confidence - a.bbox.confidence);
618
+ const selected = [];
619
+ const used = new Set();
620
+ for (let i = 0; i < detections.length; i++) {
621
+ if (used.has(i))
622
+ continue;
623
+ selected.push(detections[i]);
624
+ used.add(i);
625
+ for (let j = i + 1; j < detections.length; j++) {
626
+ if (used.has(j))
627
+ continue;
628
+ const iou = this.calculateIoU(detections[i].bbox, detections[j].bbox);
629
+ if (iou > iouThreshold) {
630
+ used.add(j);
631
+ }
632
+ }
633
+ }
634
+ return selected;
635
+ }
636
+ calculateIoU(box1, box2) {
637
+ const x1 = Math.max(box1.x1, box2.x1);
638
+ const y1 = Math.max(box1.y1, box2.y1);
639
+ const x2 = Math.min(box1.x2, box2.x2);
640
+ const y2 = Math.min(box1.y2, box2.y2);
641
+ if (x2 <= x1 || y2 <= y1)
642
+ return 0;
643
+ const intersection = (x2 - x1) * (y2 - y1);
644
+ const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
645
+ const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
646
+ const union = area1 + area2 - intersection;
647
+ return intersection / union;
648
+ }
649
+ /**
650
+ * Dispose resources
651
+ */
652
+ dispose() {
653
+ if (this.detSession) {
654
+ this.detSession.release();
655
+ this.detSession = null;
656
+ }
657
+ if (this.poseSession) {
658
+ this.poseSession.release();
659
+ this.poseSession = null;
660
+ }
661
+ this.initialized = false;
662
+ }
663
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Body solution - body pose estimation with 17 or 26 keypoints
3
+ */
4
+ import { ModeType } from '../types/index';
5
+ export declare class Body {
6
+ private detModel;
7
+ private poseModel;
8
+ private static readonly MODE;
9
+ constructor(det?: string | null, detInputSize?: [number, number], pose?: string | null, poseInputSize?: [number, number], mode?: ModeType, toOpenpose?: boolean, backend?: 'onnxruntime', device?: string);
10
+ init(): Promise<void>;
11
+ call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<{
12
+ keypoints: number[][];
13
+ scores: number[];
14
+ }>;
15
+ }
16
+ //# sourceMappingURL=body.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"body.d.ts","sourceRoot":"","sources":["../../src/solution/body.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAQ,QAAQ,EAAe,MAAM,gBAAgB,CAAC;AAE7D,qBAAa,IAAI;IACf,OAAO,CAAC,QAAQ,CAAQ;IACxB,OAAO,CAAC,SAAS,CAAU;IAE3B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAmB1B;gBAGA,GAAG,GAAE,MAAM,GAAG,IAAW,EACzB,YAAY,GAAE,CAAC,MAAM,EAAE,MAAM,CAAc,EAC3C,IAAI,GAAE,MAAM,GAAG,IAAW,EAC1B,aAAa,GAAE,CAAC,MAAM,EAAE,MAAM,CAAc,EAC5C,IAAI,GAAE,QAAqB,EAC3B,UAAU,GAAE,OAAe,EAC3B,OAAO,GAAE,aAA6B,EACtC,MAAM,GAAE,MAAc;IAiClB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAKrB,IAAI,CACR,KAAK,EAAE,UAAU,EACjB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;CAKxD"}