rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,289 @@
1
+ /**
2
+ * RTMPose3D model for 3D pose estimation
3
+ * Extends RTMPose with Z-axis prediction
4
+ * Based on rtmlib RTMPose3d class
5
+ */
6
+
7
+ import { BaseTool } from '../core/base';
8
+ import { BBox, BackendType } from '../types/index';
9
+
10
+ export class RTMPose3D extends BaseTool {
11
+ private toOpenpose: boolean;
12
+ private simccSplitRatio: number = 2.0;
13
+ private zRange: number = 2.1744869;
14
+ private initialized: boolean = false;
15
+
16
+ private readonly defaultMean: number[] = [123.675, 116.28, 103.53];
17
+ private readonly defaultStd: number[] = [58.395, 57.12, 57.375];
18
+
19
+ constructor(
20
+ onnxModel: string,
21
+ modelInputSize: [number, number] = [288, 384], // [width=288, height=384] - creates tensor [1,3,384,288]
22
+ toOpenpose: boolean = false,
23
+ backend: BackendType = 'webgpu',
24
+ zRange?: number
25
+ ) {
26
+ super(onnxModel, modelInputSize, null, null, backend);
27
+ this.toOpenpose = toOpenpose;
28
+ if (zRange !== undefined) {
29
+ this.zRange = zRange;
30
+ }
31
+ }
32
+
33
+ async init(): Promise<void> {
34
+ await super.init();
35
+ this.initialized = true;
36
+ }
37
+
38
+ async call(
39
+ image: Uint8Array,
40
+ imgWidth: number,
41
+ imgHeight: number,
42
+ bboxes: BBox[] = []
43
+ ): Promise<{
44
+ keypoints: number[][][];
45
+ scores: number[][];
46
+ keypointsSimcc: number[][][];
47
+ keypoints2d: number[][][];
48
+ }> {
49
+ if (!this.initialized) {
50
+ await this.init();
51
+ }
52
+
53
+ if (bboxes.length === 0) {
54
+ bboxes = [{ x1: 0, y1: 0, x2: imgWidth, y2: imgHeight }];
55
+ }
56
+
57
+ const allKeypoints: number[][][] = [];
58
+ const allScores: number[][] = [];
59
+ const allKeypointsSimcc: number[][][] = [];
60
+ const allKeypoints2d: number[][][] = [];
61
+
62
+ for (const bbox of bboxes) {
63
+ const { tensor, center, scale, inputSize } = this.preprocess(
64
+ image,
65
+ imgWidth,
66
+ imgHeight,
67
+ bbox
68
+ );
69
+
70
+ const outputs = await this.inference(tensor, inputSize);
71
+ const { keypoints, scores, keypointsSimcc, keypoints2d } = this.postprocess(
72
+ outputs[0].data as Float32Array,
73
+ outputs[1].data as Float32Array,
74
+ outputs[2].data as Float32Array,
75
+ outputs[0].dims,
76
+ outputs[1].dims,
77
+ outputs[2].dims,
78
+ center,
79
+ scale
80
+ );
81
+
82
+ allKeypoints.push(keypoints);
83
+ allScores.push(scores);
84
+ allKeypointsSimcc.push(keypointsSimcc);
85
+ allKeypoints2d.push(keypoints2d);
86
+ }
87
+
88
+ return {
89
+ keypoints: allKeypoints,
90
+ scores: allScores,
91
+ keypointsSimcc: allKeypointsSimcc,
92
+ keypoints2d: allKeypoints2d,
93
+ };
94
+ }
95
+
96
+ private preprocess(
97
+ img: Uint8Array,
98
+ imgWidth: number,
99
+ imgHeight: number,
100
+ bbox: BBox
101
+ ): {
102
+ tensor: Float32Array;
103
+ center: [number, number];
104
+ scale: [number, number];
105
+ inputSize: [number, number];
106
+ } {
107
+ const [inputH, inputW] = this.modelInputSize;
108
+
109
+ // Center and scale from bbox with padding (1.25 as in Python)
110
+ const center: [number, number] = [
111
+ bbox.x1 + (bbox.x2 - bbox.x1) / 2,
112
+ bbox.y1 + (bbox.y2 - bbox.y1) / 2,
113
+ ];
114
+
115
+ const bboxWidth = bbox.x2 - bbox.x1;
116
+ const bboxHeight = bbox.y2 - bbox.y1;
117
+ const padding = 1.25;
118
+
119
+ // Adjust scale to maintain aspect ratio
120
+ const aspectRatio = inputW / inputH;
121
+ const bboxAspectRatio = bboxWidth / bboxHeight;
122
+
123
+ let scaleW: number, scaleH: number;
124
+ if (bboxAspectRatio > aspectRatio) {
125
+ scaleW = bboxWidth * padding;
126
+ scaleH = scaleW / aspectRatio;
127
+ } else {
128
+ scaleH = bboxHeight * padding;
129
+ scaleW = scaleH * aspectRatio;
130
+ }
131
+
132
+ const scale: [number, number] = [scaleW, scaleH];
133
+
134
+ // Create canvas for cropping
135
+ const canvas = document.createElement('canvas');
136
+ const ctx = canvas.getContext('2d')!;
137
+ canvas.width = inputW;
138
+ canvas.height = inputH;
139
+
140
+ ctx.fillStyle = '#FFFFFF';
141
+ ctx.fillRect(0, 0, inputW, inputH);
142
+
143
+ // Create source canvas from image data
144
+ const srcCanvas = document.createElement('canvas');
145
+ const srcCtx = srcCanvas.getContext('2d')!;
146
+ srcCanvas.width = imgWidth;
147
+ srcCanvas.height = imgHeight;
148
+
149
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
150
+ srcImageData.data.set(img);
151
+ srcCtx.putImageData(srcImageData, 0, 0);
152
+
153
+ // Calculate source region
154
+ const srcX = center[0] - scaleW / 2;
155
+ const srcY = center[1] - scaleH / 2;
156
+
157
+ // Draw cropped and scaled region using warpAffine-like transformation
158
+ this.warpAffine(ctx, srcCanvas, center, scale, inputW, inputH, srcX, srcY);
159
+
160
+ const imageData = ctx.getImageData(0, 0, inputW, inputH);
161
+
162
+ // Normalize with mean/std
163
+ const data = new Float32Array(inputW * inputH * 3);
164
+ for (let i = 0; i < imageData.data.length; i += 4) {
165
+ const pixelIndex = i / 4;
166
+ for (let c = 0; c < 3; c++) {
167
+ const value = imageData.data[i + c];
168
+ data[c * inputW * inputH + pixelIndex] =
169
+ (value - this.defaultMean[c]) / this.defaultStd[c];
170
+ }
171
+ }
172
+
173
+ return {
174
+ tensor: data,
175
+ center,
176
+ scale,
177
+ inputSize: [inputH, inputW],
178
+ };
179
+ }
180
+
181
+ private warpAffine(
182
+ ctx: CanvasRenderingContext2D,
183
+ srcCanvas: HTMLCanvasElement,
184
+ center: [number, number],
185
+ scale: [number, number],
186
+ dstWidth: number,
187
+ dstHeight: number,
188
+ srcX: number,
189
+ srcY: number
190
+ ): void {
191
+ // Simple affine transform using canvas drawImage
192
+ // For more accurate transformation, OpenCV bindings would be needed
193
+ ctx.drawImage(srcCanvas, srcX, srcY, scale[0], scale[1], 0, 0, dstWidth, dstHeight);
194
+ }
195
+
196
+ private postprocess(
197
+ simccX: Float32Array,
198
+ simccY: Float32Array,
199
+ simccZ: Float32Array,
200
+ outputShapeX: number[],
201
+ outputShapeY: number[],
202
+ outputShapeZ: number[],
203
+ center: [number, number],
204
+ scale: [number, number]
205
+ ): {
206
+ keypoints: number[][];
207
+ scores: number[];
208
+ keypointsSimcc: number[][];
209
+ keypoints2d: number[][];
210
+ } {
211
+ const numKeypoints = outputShapeX[1];
212
+ const wx = outputShapeX[2];
213
+ const wy = outputShapeY[2];
214
+ const wz = outputShapeZ[2];
215
+
216
+ const keypoints: number[][] = [];
217
+ const scores: number[] = [];
218
+ const keypointsSimcc: number[][] = [];
219
+ const keypoints2d: number[][] = [];
220
+
221
+ for (let k = 0; k < numKeypoints; k++) {
222
+ // Find argmax for x
223
+ let maxX = -Infinity;
224
+ let argmaxX = 0;
225
+ for (let i = 0; i < wx; i++) {
226
+ const val = simccX[k * wx + i];
227
+ if (val > maxX) {
228
+ maxX = val;
229
+ argmaxX = i;
230
+ }
231
+ }
232
+
233
+ // Find argmax for y
234
+ let maxY = -Infinity;
235
+ let argmaxY = 0;
236
+ for (let i = 0; i < wy; i++) {
237
+ const val = simccY[k * wy + i];
238
+ if (val > maxY) {
239
+ maxY = val;
240
+ argmaxY = i;
241
+ }
242
+ }
243
+
244
+ // Find argmax for z
245
+ let maxZ = -Infinity;
246
+ let argmaxZ = 0;
247
+ for (let i = 0; i < wz; i++) {
248
+ const val = simccZ[k * wz + i];
249
+ if (val > maxZ) {
250
+ maxZ = val;
251
+ argmaxZ = i;
252
+ }
253
+ }
254
+
255
+ // Score is max of x and y (as in Python)
256
+ const score = maxX > maxY ? maxX : maxY;
257
+
258
+ // Normalize to [0, 1] and transform to original image coordinates
259
+ const normX = argmaxX / wx;
260
+ const normY = argmaxY / wy;
261
+ const normZ = argmaxZ / wz;
262
+
263
+ // Apply split ratio
264
+ const kptX = (normX - 0.5) * this.simccSplitRatio;
265
+ const kptY = (normY - 0.5) * this.simccSplitRatio;
266
+ const kptZ = (normZ - 0.5) * this.simccSplitRatio;
267
+
268
+ // Convert Z to metric scale
269
+ // Python uses model_input_size[-1] which is width (384) in (H, W) format
270
+ // TypeScript uses modelInputSize[0] which is width (288) in [W, H] format
271
+ const kptZMetric = (normZ / (this.modelInputSize[0] / 2) - 1) * this.zRange;
272
+
273
+ // 3D keypoint
274
+ keypoints.push([kptX, kptY, kptZMetric]);
275
+
276
+ // SimCC coordinates (normalized)
277
+ keypointsSimcc.push([normX, normY, normZ]);
278
+
279
+ // 2D keypoint in original image coordinates
280
+ const kpt2dX = normX * scale[0] + center[0] - 0.5 * scale[0];
281
+ const kpt2dY = normY * scale[1] + center[1] - 0.5 * scale[1];
282
+ keypoints2d.push([kpt2dX, kpt2dY]);
283
+
284
+ scores.push(score);
285
+ }
286
+
287
+ return { keypoints, scores, keypointsSimcc, keypoints2d };
288
+ }
289
+ }
@@ -0,0 +1,220 @@
1
+ /**
2
+ * YOLO12 object detection model
3
+ * Based on YOLO12 architecture for person detection
4
+ * Compatible with Ultralytics YOLOv12 ONNX export
5
+ * Uses onnxruntime-web for inference
6
+ */
7
+
8
+ import { BaseTool } from '../core/base';
9
+ import { BBox, Detection, BackendType } from '../types/index';
10
+
11
+ export class YOLO12 extends BaseTool {
12
+ private nmsThr: number;
13
+ public scoreThr: number;
14
+ private initialized: boolean = false;
15
+ private paddingX: number = 0;
16
+ private paddingY: number = 0;
17
+ private scaleX: number = 1;
18
+ private scaleY: number = 1;
19
+
20
+ constructor(
21
+ modelPath: string,
22
+ modelInputSize: [number, number] = [640, 640],
23
+ nmsThr: number = 0.45,
24
+ scoreThr: number = 0.5,
25
+ backend: BackendType = 'webgpu'
26
+ ) {
27
+ super(modelPath, modelInputSize, null, null, backend);
28
+ this.nmsThr = nmsThr;
29
+ this.scoreThr = scoreThr;
30
+ }
31
+
32
+ async init(): Promise<void> {
33
+ await super.init();
34
+ this.initialized = true;
35
+ }
36
+
37
+ async call(
38
+ image: Uint8Array,
39
+ imgWidth: number,
40
+ imgHeight: number
41
+ ): Promise<Detection[]> {
42
+ if (!this.initialized) {
43
+ await this.init();
44
+ }
45
+
46
+ const { paddedImg } = this.preprocess(image, imgWidth, imgHeight);
47
+ const outputs = await this.inference(paddedImg);
48
+
49
+ // YOLO12 output format: [1, num_boxes, 6] where 6 = [x1, y1, x2, y2, score, class_id]
50
+ const detOutput = outputs[0];
51
+ const detShape = detOutput.dims;
52
+
53
+ if (detShape.length !== 3 || detShape[2] !== 6 || detOutput.type !== 'float32') {
54
+ console.error(`YOLO12: Unexpected output shape [${detShape}] or type ${detOutput.type}`);
55
+ return [];
56
+ }
57
+
58
+ const detArray = detOutput.data as Float32Array;
59
+ const numBoxes = detShape[1];
60
+ const detections: Detection[] = [];
61
+
62
+ for (let i = 0; i < numBoxes; i++) {
63
+ const baseIdx = i * 6;
64
+ let x1 = detArray[baseIdx];
65
+ let y1 = detArray[baseIdx + 1];
66
+ let x2 = detArray[baseIdx + 2];
67
+ let y2 = detArray[baseIdx + 3];
68
+ const score = detArray[baseIdx + 4];
69
+ const classId = detArray[baseIdx + 5];
70
+
71
+ // Filter by score threshold and class (0 = person in COCO)
72
+ if (score < this.scoreThr || classId !== 0) {
73
+ continue;
74
+ }
75
+
76
+ // Transform from padded coordinates to original image coordinates
77
+ const transformedX1 = (x1 - this.paddingX) * this.scaleX;
78
+ const transformedY1 = (y1 - this.paddingY) * this.scaleY;
79
+ const transformedX2 = (x2 - this.paddingX) * this.scaleX;
80
+ const transformedY2 = (y2 - this.paddingY) * this.scaleY;
81
+
82
+ // Validate box coordinates
83
+ if (transformedX1 >= transformedX2 || transformedY1 >= transformedY2) {
84
+ continue;
85
+ }
86
+
87
+ detections.push({
88
+ bbox: {
89
+ x1: Math.max(0, transformedX1),
90
+ y1: Math.max(0, transformedY1),
91
+ x2: Math.min(imgWidth, transformedX2),
92
+ y2: Math.min(imgHeight, transformedY2),
93
+ },
94
+ score,
95
+ classId: Math.round(classId),
96
+ });
97
+ }
98
+
99
+ // Apply NMS
100
+ return this.applyNms(detections, this.nmsThr);
101
+ }
102
+
103
+ private preprocess(
104
+ img: Uint8Array,
105
+ imgWidth: number,
106
+ imgHeight: number
107
+ ): { paddedImg: Float32Array; ratio: number } {
108
+ const [inputH, inputW] = this.modelInputSize;
109
+
110
+ // Create canvas for padded image (black background)
111
+ const paddedImg = new Uint8Array(inputH * inputW * 3).fill(0);
112
+
113
+ // Calculate scaling and positioning to maintain aspect ratio
114
+ const aspectRatio = imgWidth / imgHeight;
115
+ const targetAspectRatio = inputW / inputH;
116
+
117
+ let drawWidth: number, drawHeight: number;
118
+
119
+ if (aspectRatio > targetAspectRatio) {
120
+ // Image is wider - fit to width, add padding top/bottom
121
+ drawWidth = inputW;
122
+ drawHeight = Math.floor(inputW / aspectRatio);
123
+ this.paddingX = 0;
124
+ this.paddingY = (inputH - drawHeight) / 2;
125
+ } else {
126
+ // Image is taller - fit to height, add padding left/right
127
+ drawHeight = inputH;
128
+ drawWidth = Math.floor(inputH * aspectRatio);
129
+ this.paddingX = (inputW - drawWidth) / 2;
130
+ this.paddingY = 0;
131
+ }
132
+
133
+ // Calculate scale factors
134
+ this.scaleX = imgWidth / drawWidth;
135
+ this.scaleY = imgHeight / drawHeight;
136
+
137
+ // Resize image onto padded canvas (nearest neighbor)
138
+ for (let y = 0; y < drawHeight; y++) {
139
+ for (let x = 0; x < drawWidth; x++) {
140
+ const srcX = Math.floor(x * this.scaleX);
141
+ const srcY = Math.floor(y * this.scaleY);
142
+ const dstX = Math.floor(x + this.paddingX);
143
+ const dstY = Math.floor(y + this.paddingY);
144
+
145
+ for (let c = 0; c < 3; c++) {
146
+ paddedImg[(dstY * inputW + dstX) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
147
+ }
148
+ }
149
+ }
150
+
151
+ // Normalize to [0, 1] and convert to float32
152
+ const floatImg = new Float32Array(paddedImg.length);
153
+ for (let i = 0; i < paddedImg.length; i++) {
154
+ floatImg[i] = paddedImg[i] / 255.0;
155
+ }
156
+
157
+ // Transpose HWC to CHW
158
+ const transposed = new Float32Array(3 * inputH * inputW);
159
+ for (let c = 0; c < 3; c++) {
160
+ for (let h = 0; h < inputH; h++) {
161
+ for (let w = 0; w < inputW; w++) {
162
+ transposed[c * inputH * inputW + h * inputW + w] =
163
+ floatImg[h * inputW * 3 + w * 3 + c];
164
+ }
165
+ }
166
+ }
167
+
168
+ return { paddedImg: transposed, ratio: 1 };
169
+ }
170
+
171
+ private applyNms(detections: Detection[], iouThreshold: number): Detection[] {
172
+ if (detections.length === 0) {
173
+ return [];
174
+ }
175
+
176
+ // Sort by score descending
177
+ detections.sort((a, b) => b.score - a.score);
178
+
179
+ const selected: Detection[] = [];
180
+ const used: boolean[] = new Array(detections.length).fill(false);
181
+
182
+ for (let i = 0; i < detections.length; i++) {
183
+ if (used[i]) {
184
+ continue;
185
+ }
186
+
187
+ selected.push(detections[i]);
188
+ used[i] = true;
189
+
190
+ const boxA = detections[i].bbox;
191
+
192
+ for (let j = i + 1; j < detections.length; j++) {
193
+ if (used[j]) {
194
+ continue;
195
+ }
196
+
197
+ const boxB = detections[j].bbox;
198
+
199
+ // Calculate IoU
200
+ const x1 = Math.max(boxA.x1, boxB.x1);
201
+ const y1 = Math.max(boxA.y1, boxB.y1);
202
+ const x2 = Math.min(boxA.x2, boxB.x2);
203
+ const y2 = Math.min(boxA.y2, boxB.y2);
204
+
205
+ const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
206
+ const areaA = (boxA.x2 - boxA.x1) * (boxA.y2 - boxA.y1);
207
+ const areaB = (boxB.x2 - boxB.x1) * (boxB.y2 - boxB.y1);
208
+ const union = areaA + areaB - intersection;
209
+
210
+ const iou = union > 0 ? intersection / union : 0;
211
+
212
+ if (iou <= iouThreshold) {
213
+ used[j] = true;
214
+ }
215
+ }
216
+ }
217
+
218
+ return selected;
219
+ }
220
+ }