rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,184 @@
1
+ /**
2
+ * RTMPose3D model for 3D pose estimation
3
+ * Extends RTMPose with Z-axis prediction
4
+ * Based on rtmlib RTMPose3d class
5
+ */
6
+ import { BaseTool } from '../core/base';
7
+ export class RTMPose3D extends BaseTool {
8
+ constructor(onnxModel, modelInputSize = [288, 384], // [width=288, height=384] - creates tensor [1,3,384,288]
9
+ toOpenpose = false, backend = 'webgpu', zRange) {
10
+ super(onnxModel, modelInputSize, null, null, backend);
11
+ this.simccSplitRatio = 2.0;
12
+ this.zRange = 2.1744869;
13
+ this.initialized = false;
14
+ this.defaultMean = [123.675, 116.28, 103.53];
15
+ this.defaultStd = [58.395, 57.12, 57.375];
16
+ this.toOpenpose = toOpenpose;
17
+ if (zRange !== undefined) {
18
+ this.zRange = zRange;
19
+ }
20
+ }
21
+ async init() {
22
+ await super.init();
23
+ this.initialized = true;
24
+ }
25
+ async call(image, imgWidth, imgHeight, bboxes = []) {
26
+ if (!this.initialized) {
27
+ await this.init();
28
+ }
29
+ if (bboxes.length === 0) {
30
+ bboxes = [{ x1: 0, y1: 0, x2: imgWidth, y2: imgHeight }];
31
+ }
32
+ const allKeypoints = [];
33
+ const allScores = [];
34
+ const allKeypointsSimcc = [];
35
+ const allKeypoints2d = [];
36
+ for (const bbox of bboxes) {
37
+ const { tensor, center, scale, inputSize } = this.preprocess(image, imgWidth, imgHeight, bbox);
38
+ const outputs = await this.inference(tensor, inputSize);
39
+ const { keypoints, scores, keypointsSimcc, keypoints2d } = this.postprocess(outputs[0].data, outputs[1].data, outputs[2].data, outputs[0].dims, outputs[1].dims, outputs[2].dims, center, scale);
40
+ allKeypoints.push(keypoints);
41
+ allScores.push(scores);
42
+ allKeypointsSimcc.push(keypointsSimcc);
43
+ allKeypoints2d.push(keypoints2d);
44
+ }
45
+ return {
46
+ keypoints: allKeypoints,
47
+ scores: allScores,
48
+ keypointsSimcc: allKeypointsSimcc,
49
+ keypoints2d: allKeypoints2d,
50
+ };
51
+ }
52
+ preprocess(img, imgWidth, imgHeight, bbox) {
53
+ const [inputH, inputW] = this.modelInputSize;
54
+ // Center and scale from bbox with padding (1.25 as in Python)
55
+ const center = [
56
+ bbox.x1 + (bbox.x2 - bbox.x1) / 2,
57
+ bbox.y1 + (bbox.y2 - bbox.y1) / 2,
58
+ ];
59
+ const bboxWidth = bbox.x2 - bbox.x1;
60
+ const bboxHeight = bbox.y2 - bbox.y1;
61
+ const padding = 1.25;
62
+ // Adjust scale to maintain aspect ratio
63
+ const aspectRatio = inputW / inputH;
64
+ const bboxAspectRatio = bboxWidth / bboxHeight;
65
+ let scaleW, scaleH;
66
+ if (bboxAspectRatio > aspectRatio) {
67
+ scaleW = bboxWidth * padding;
68
+ scaleH = scaleW / aspectRatio;
69
+ }
70
+ else {
71
+ scaleH = bboxHeight * padding;
72
+ scaleW = scaleH * aspectRatio;
73
+ }
74
+ const scale = [scaleW, scaleH];
75
+ // Create canvas for cropping
76
+ const canvas = document.createElement('canvas');
77
+ const ctx = canvas.getContext('2d');
78
+ canvas.width = inputW;
79
+ canvas.height = inputH;
80
+ ctx.fillStyle = '#FFFFFF';
81
+ ctx.fillRect(0, 0, inputW, inputH);
82
+ // Create source canvas from image data
83
+ const srcCanvas = document.createElement('canvas');
84
+ const srcCtx = srcCanvas.getContext('2d');
85
+ srcCanvas.width = imgWidth;
86
+ srcCanvas.height = imgHeight;
87
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
88
+ srcImageData.data.set(img);
89
+ srcCtx.putImageData(srcImageData, 0, 0);
90
+ // Calculate source region
91
+ const srcX = center[0] - scaleW / 2;
92
+ const srcY = center[1] - scaleH / 2;
93
+ // Draw cropped and scaled region using warpAffine-like transformation
94
+ this.warpAffine(ctx, srcCanvas, center, scale, inputW, inputH, srcX, srcY);
95
+ const imageData = ctx.getImageData(0, 0, inputW, inputH);
96
+ // Normalize with mean/std
97
+ const data = new Float32Array(inputW * inputH * 3);
98
+ for (let i = 0; i < imageData.data.length; i += 4) {
99
+ const pixelIndex = i / 4;
100
+ for (let c = 0; c < 3; c++) {
101
+ const value = imageData.data[i + c];
102
+ data[c * inputW * inputH + pixelIndex] =
103
+ (value - this.defaultMean[c]) / this.defaultStd[c];
104
+ }
105
+ }
106
+ return {
107
+ tensor: data,
108
+ center,
109
+ scale,
110
+ inputSize: [inputH, inputW],
111
+ };
112
+ }
113
+ warpAffine(ctx, srcCanvas, center, scale, dstWidth, dstHeight, srcX, srcY) {
114
+ // Simple affine transform using canvas drawImage
115
+ // For more accurate transformation, OpenCV bindings would be needed
116
+ ctx.drawImage(srcCanvas, srcX, srcY, scale[0], scale[1], 0, 0, dstWidth, dstHeight);
117
+ }
118
+ postprocess(simccX, simccY, simccZ, outputShapeX, outputShapeY, outputShapeZ, center, scale) {
119
+ const numKeypoints = outputShapeX[1];
120
+ const wx = outputShapeX[2];
121
+ const wy = outputShapeY[2];
122
+ const wz = outputShapeZ[2];
123
+ const keypoints = [];
124
+ const scores = [];
125
+ const keypointsSimcc = [];
126
+ const keypoints2d = [];
127
+ for (let k = 0; k < numKeypoints; k++) {
128
+ // Find argmax for x
129
+ let maxX = -Infinity;
130
+ let argmaxX = 0;
131
+ for (let i = 0; i < wx; i++) {
132
+ const val = simccX[k * wx + i];
133
+ if (val > maxX) {
134
+ maxX = val;
135
+ argmaxX = i;
136
+ }
137
+ }
138
+ // Find argmax for y
139
+ let maxY = -Infinity;
140
+ let argmaxY = 0;
141
+ for (let i = 0; i < wy; i++) {
142
+ const val = simccY[k * wy + i];
143
+ if (val > maxY) {
144
+ maxY = val;
145
+ argmaxY = i;
146
+ }
147
+ }
148
+ // Find argmax for z
149
+ let maxZ = -Infinity;
150
+ let argmaxZ = 0;
151
+ for (let i = 0; i < wz; i++) {
152
+ const val = simccZ[k * wz + i];
153
+ if (val > maxZ) {
154
+ maxZ = val;
155
+ argmaxZ = i;
156
+ }
157
+ }
158
+ // Score is max of x and y (as in Python)
159
+ const score = maxX > maxY ? maxX : maxY;
160
+ // Normalize to [0, 1] and transform to original image coordinates
161
+ const normX = argmaxX / wx;
162
+ const normY = argmaxY / wy;
163
+ const normZ = argmaxZ / wz;
164
+ // Apply split ratio
165
+ const kptX = (normX - 0.5) * this.simccSplitRatio;
166
+ const kptY = (normY - 0.5) * this.simccSplitRatio;
167
+ const kptZ = (normZ - 0.5) * this.simccSplitRatio;
168
+ // Convert Z to metric scale
169
+ // Python uses model_input_size[-1] which is width (384) in (H, W) format
170
+ // TypeScript uses modelInputSize[0] which is width (288) in [W, H] format
171
+ const kptZMetric = (normZ / (this.modelInputSize[0] / 2) - 1) * this.zRange;
172
+ // 3D keypoint
173
+ keypoints.push([kptX, kptY, kptZMetric]);
174
+ // SimCC coordinates (normalized)
175
+ keypointsSimcc.push([normX, normY, normZ]);
176
+ // 2D keypoint in original image coordinates
177
+ const kpt2dX = normX * scale[0] + center[0] - 0.5 * scale[0];
178
+ const kpt2dY = normY * scale[1] + center[1] - 0.5 * scale[1];
179
+ keypoints2d.push([kpt2dX, kpt2dY]);
180
+ scores.push(score);
181
+ }
182
+ return { keypoints, scores, keypointsSimcc, keypoints2d };
183
+ }
184
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * YOLO12 object detection model
3
+ * Based on YOLO12 architecture for person detection
4
+ * Compatible with Ultralytics YOLOv12 ONNX export
5
+ * Uses onnxruntime-web for inference
6
+ */
7
+ import { BaseTool } from '../core/base';
8
+ import { Detection, BackendType } from '../types/index';
9
+ export declare class YOLO12 extends BaseTool {
10
+ private nmsThr;
11
+ scoreThr: number;
12
+ private initialized;
13
+ private paddingX;
14
+ private paddingY;
15
+ private scaleX;
16
+ private scaleY;
17
+ constructor(modelPath: string, modelInputSize?: [number, number], nmsThr?: number, scoreThr?: number, backend?: BackendType);
18
+ init(): Promise<void>;
19
+ call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<Detection[]>;
20
+ private preprocess;
21
+ private applyNms;
22
+ }
23
+ //# sourceMappingURL=yolo12.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"yolo12.d.ts","sourceRoot":"","sources":["../../src/models/yolo12.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAQ,SAAS,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE9D,qBAAa,MAAO,SAAQ,QAAQ;IAClC,OAAO,CAAC,MAAM,CAAS;IAChB,QAAQ,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,WAAW,CAAkB;IACrC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,MAAM,CAAa;gBAGzB,SAAS,EAAE,MAAM,EACjB,cAAc,GAAE,CAAC,MAAM,EAAE,MAAM,CAAc,EAC7C,MAAM,GAAE,MAAa,EACrB,QAAQ,GAAE,MAAY,EACtB,OAAO,GAAE,WAAsB;IAO3B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAKrB,IAAI,CACR,KAAK,EAAE,UAAU,EACjB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,SAAS,EAAE,CAAC;IA8DvB,OAAO,CAAC,UAAU;IAoElB,OAAO,CAAC,QAAQ;CAiDjB"}
@@ -0,0 +1,165 @@
1
+ /**
2
+ * YOLO12 object detection model
3
+ * Based on YOLO12 architecture for person detection
4
+ * Compatible with Ultralytics YOLOv12 ONNX export
5
+ * Uses onnxruntime-web for inference
6
+ */
7
+ import { BaseTool } from '../core/base';
8
+ export class YOLO12 extends BaseTool {
9
+ constructor(modelPath, modelInputSize = [640, 640], nmsThr = 0.45, scoreThr = 0.5, backend = 'webgpu') {
10
+ super(modelPath, modelInputSize, null, null, backend);
11
+ this.initialized = false;
12
+ this.paddingX = 0;
13
+ this.paddingY = 0;
14
+ this.scaleX = 1;
15
+ this.scaleY = 1;
16
+ this.nmsThr = nmsThr;
17
+ this.scoreThr = scoreThr;
18
+ }
19
+ async init() {
20
+ await super.init();
21
+ this.initialized = true;
22
+ }
23
+ async call(image, imgWidth, imgHeight) {
24
+ if (!this.initialized) {
25
+ await this.init();
26
+ }
27
+ const { paddedImg } = this.preprocess(image, imgWidth, imgHeight);
28
+ const outputs = await this.inference(paddedImg);
29
+ // YOLO12 output format: [1, num_boxes, 6] where 6 = [x1, y1, x2, y2, score, class_id]
30
+ const detOutput = outputs[0];
31
+ const detShape = detOutput.dims;
32
+ if (detShape.length !== 3 || detShape[2] !== 6 || detOutput.type !== 'float32') {
33
+ console.error(`YOLO12: Unexpected output shape [${detShape}] or type ${detOutput.type}`);
34
+ return [];
35
+ }
36
+ const detArray = detOutput.data;
37
+ const numBoxes = detShape[1];
38
+ const detections = [];
39
+ for (let i = 0; i < numBoxes; i++) {
40
+ const baseIdx = i * 6;
41
+ let x1 = detArray[baseIdx];
42
+ let y1 = detArray[baseIdx + 1];
43
+ let x2 = detArray[baseIdx + 2];
44
+ let y2 = detArray[baseIdx + 3];
45
+ const score = detArray[baseIdx + 4];
46
+ const classId = detArray[baseIdx + 5];
47
+ // Filter by score threshold and class (0 = person in COCO)
48
+ if (score < this.scoreThr || classId !== 0) {
49
+ continue;
50
+ }
51
+ // Transform from padded coordinates to original image coordinates
52
+ const transformedX1 = (x1 - this.paddingX) * this.scaleX;
53
+ const transformedY1 = (y1 - this.paddingY) * this.scaleY;
54
+ const transformedX2 = (x2 - this.paddingX) * this.scaleX;
55
+ const transformedY2 = (y2 - this.paddingY) * this.scaleY;
56
+ // Validate box coordinates
57
+ if (transformedX1 >= transformedX2 || transformedY1 >= transformedY2) {
58
+ continue;
59
+ }
60
+ detections.push({
61
+ bbox: {
62
+ x1: Math.max(0, transformedX1),
63
+ y1: Math.max(0, transformedY1),
64
+ x2: Math.min(imgWidth, transformedX2),
65
+ y2: Math.min(imgHeight, transformedY2),
66
+ },
67
+ score,
68
+ classId: Math.round(classId),
69
+ });
70
+ }
71
+ // Apply NMS
72
+ return this.applyNms(detections, this.nmsThr);
73
+ }
74
+ preprocess(img, imgWidth, imgHeight) {
75
+ const [inputH, inputW] = this.modelInputSize;
76
+ // Create canvas for padded image (black background)
77
+ const paddedImg = new Uint8Array(inputH * inputW * 3).fill(0);
78
+ // Calculate scaling and positioning to maintain aspect ratio
79
+ const aspectRatio = imgWidth / imgHeight;
80
+ const targetAspectRatio = inputW / inputH;
81
+ let drawWidth, drawHeight;
82
+ if (aspectRatio > targetAspectRatio) {
83
+ // Image is wider - fit to width, add padding top/bottom
84
+ drawWidth = inputW;
85
+ drawHeight = Math.floor(inputW / aspectRatio);
86
+ this.paddingX = 0;
87
+ this.paddingY = (inputH - drawHeight) / 2;
88
+ }
89
+ else {
90
+ // Image is taller - fit to height, add padding left/right
91
+ drawHeight = inputH;
92
+ drawWidth = Math.floor(inputH * aspectRatio);
93
+ this.paddingX = (inputW - drawWidth) / 2;
94
+ this.paddingY = 0;
95
+ }
96
+ // Calculate scale factors
97
+ this.scaleX = imgWidth / drawWidth;
98
+ this.scaleY = imgHeight / drawHeight;
99
+ // Resize image onto padded canvas (nearest neighbor)
100
+ for (let y = 0; y < drawHeight; y++) {
101
+ for (let x = 0; x < drawWidth; x++) {
102
+ const srcX = Math.floor(x * this.scaleX);
103
+ const srcY = Math.floor(y * this.scaleY);
104
+ const dstX = Math.floor(x + this.paddingX);
105
+ const dstY = Math.floor(y + this.paddingY);
106
+ for (let c = 0; c < 3; c++) {
107
+ paddedImg[(dstY * inputW + dstX) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
108
+ }
109
+ }
110
+ }
111
+ // Normalize to [0, 1] and convert to float32
112
+ const floatImg = new Float32Array(paddedImg.length);
113
+ for (let i = 0; i < paddedImg.length; i++) {
114
+ floatImg[i] = paddedImg[i] / 255.0;
115
+ }
116
+ // Transpose HWC to CHW
117
+ const transposed = new Float32Array(3 * inputH * inputW);
118
+ for (let c = 0; c < 3; c++) {
119
+ for (let h = 0; h < inputH; h++) {
120
+ for (let w = 0; w < inputW; w++) {
121
+ transposed[c * inputH * inputW + h * inputW + w] =
122
+ floatImg[h * inputW * 3 + w * 3 + c];
123
+ }
124
+ }
125
+ }
126
+ return { paddedImg: transposed, ratio: 1 };
127
+ }
128
+ applyNms(detections, iouThreshold) {
129
+ if (detections.length === 0) {
130
+ return [];
131
+ }
132
+ // Sort by score descending
133
+ detections.sort((a, b) => b.score - a.score);
134
+ const selected = [];
135
+ const used = new Array(detections.length).fill(false);
136
+ for (let i = 0; i < detections.length; i++) {
137
+ if (used[i]) {
138
+ continue;
139
+ }
140
+ selected.push(detections[i]);
141
+ used[i] = true;
142
+ const boxA = detections[i].bbox;
143
+ for (let j = i + 1; j < detections.length; j++) {
144
+ if (used[j]) {
145
+ continue;
146
+ }
147
+ const boxB = detections[j].bbox;
148
+ // Calculate IoU
149
+ const x1 = Math.max(boxA.x1, boxB.x1);
150
+ const y1 = Math.max(boxA.y1, boxB.y1);
151
+ const x2 = Math.min(boxA.x2, boxB.x2);
152
+ const y2 = Math.min(boxA.y2, boxB.y2);
153
+ const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
154
+ const areaA = (boxA.x2 - boxA.x1) * (boxA.y2 - boxA.y1);
155
+ const areaB = (boxB.x2 - boxB.x1) * (boxB.y2 - boxB.y1);
156
+ const union = areaA + areaB - intersection;
157
+ const iou = union > 0 ? intersection / union : 0;
158
+ if (iou <= iouThreshold) {
159
+ used[j] = true;
160
+ }
161
+ }
162
+ }
163
+ return selected;
164
+ }
165
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * YOLOX object detection model
3
+ * Based on https://github.com/IDEA-Research/DWPose/blob/opencv_onnx/ControlNet-v1-1-nightly/annotator/dwpose/cv_ox_det.py
4
+ */
5
+ import { BaseTool } from '../core/base';
6
+ import { BBox, BackendType } from '../types/index';
7
+ export declare class YOLOX extends BaseTool {
8
+ private nmsThr;
9
+ scoreThr: number;
10
+ private initialized;
11
+ constructor(onnxModel: string, modelInputSize?: [number, number], nmsThr?: number, scoreThr?: number, // Lower default threshold
12
+ backend?: BackendType);
13
+ init(): Promise<void>;
14
+ call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<BBox[]>;
15
+ private preprocess;
16
+ private postprocess;
17
+ }
18
+ //# sourceMappingURL=yolox.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"yolox.d.ts","sourceRoot":"","sources":["../../src/models/yolox.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AAExC,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAEnD,qBAAa,KAAM,SAAQ,QAAQ;IACjC,OAAO,CAAC,MAAM,CAAS;IAChB,QAAQ,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,WAAW,CAAkB;gBAGnC,SAAS,EAAE,MAAM,EACjB,cAAc,GAAE,CAAC,MAAM,EAAE,MAAM,CAAc,EAC7C,MAAM,GAAE,MAAa,EACrB,QAAQ,GAAE,MAAY,EAAG,0BAA0B;IACnD,OAAO,GAAE,WAAsB;IAO3B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,IAAI,CAAC,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAoEnF,OAAO,CAAC,UAAU;IA0DlB,OAAO,CAAC,WAAW;CAuDpB"}
@@ -0,0 +1,167 @@
1
+ /**
2
+ * YOLOX object detection model
3
+ * Based on https://github.com/IDEA-Research/DWPose/blob/opencv_onnx/ControlNet-v1-1-nightly/annotator/dwpose/cv_ox_det.py
4
+ */
5
+ import { BaseTool } from '../core/base';
6
+ export class YOLOX extends BaseTool {
7
+ constructor(onnxModel, modelInputSize = [640, 640], nmsThr = 0.45, scoreThr = 0.3, // Lower default threshold
8
+ backend = 'webgpu') {
9
+ super(onnxModel, modelInputSize, null, null, backend);
10
+ this.initialized = false;
11
+ this.nmsThr = nmsThr;
12
+ this.scoreThr = scoreThr;
13
+ }
14
+ async init() {
15
+ // Web version - model path is direct URL
16
+ await super.init();
17
+ this.initialized = true;
18
+ }
19
+ async call(image, imgWidth, imgHeight) {
20
+ if (!this.initialized) {
21
+ await this.init();
22
+ }
23
+ const { paddedImg, ratio } = this.preprocess(image, imgWidth, imgHeight);
24
+ const outputs = await this.inference(paddedImg);
25
+ console.log(`YOLOX: Got ${outputs.length} outputs`);
26
+ for (let i = 0; i < outputs.length; i++) {
27
+ console.log(` Output[${i}]: dims=[${outputs[i].dims}], type=${outputs[i].type}`);
28
+ }
29
+ // For end2end YOLOX with built-in NMS:
30
+ // Output 0: [1, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
31
+ // Output 1: [1, num_dets] or [1, 1] with count
32
+ const detOutput = outputs[0];
33
+ const detShape = detOutput.dims; // [1, num_dets, 5]
34
+ console.log(`YOLOX: detShape=[${detShape}], ratio=${ratio}`);
35
+ if (detShape.length === 3 && detShape[2] === 5 && detOutput.type === 'float32') {
36
+ const detArray = detOutput.data;
37
+ const numDets = detShape[1];
38
+ const boxes = [];
39
+ console.log(`YOLOX: Raw detections (first 5):`);
40
+ for (let i = 0; i < Math.min(5, numDets); i++) {
41
+ const baseIdx = i * 5;
42
+ const x1 = detArray[baseIdx];
43
+ const y1 = detArray[baseIdx + 1];
44
+ const x2 = detArray[baseIdx + 2];
45
+ const y2 = detArray[baseIdx + 3];
46
+ const score = detArray[baseIdx + 4];
47
+ console.log(` [${i}] raw=[${x1.toFixed(2)}, ${y1.toFixed(2)}, ${x2.toFixed(2)}, ${y2.toFixed(2)}] score=${score.toFixed(4)}`);
48
+ }
49
+ for (let i = 0; i < numDets; i++) {
50
+ const baseIdx = i * 5;
51
+ let x1 = detArray[baseIdx];
52
+ let y1 = detArray[baseIdx + 1];
53
+ let x2 = detArray[baseIdx + 2];
54
+ let y2 = detArray[baseIdx + 3];
55
+ const score = detArray[baseIdx + 4];
56
+ // Scale to original image
57
+ x1 /= ratio;
58
+ y1 /= ratio;
59
+ x2 /= ratio;
60
+ y2 /= ratio;
61
+ // Python uses score > 0.3 threshold
62
+ if (score > 0.3 && x2 > x1 && y2 > y1) {
63
+ boxes.push({ x1, y1, x2, y2 });
64
+ console.log(` [${i}] ACCEPTED: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score=${score.toFixed(3)}`);
65
+ }
66
+ else if (score > 0.1) {
67
+ console.log(` [${i}] rejected (score=${score.toFixed(3)}): [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
68
+ }
69
+ }
70
+ console.log(`YOLOX: Found ${boxes.length} boxes`);
71
+ return boxes;
72
+ }
73
+ return [];
74
+ }
75
+ preprocess(img, imgWidth, imgHeight) {
76
+ const [inputH, inputW] = this.modelInputSize;
77
+ let paddedImg;
78
+ let ratio;
79
+ if (imgHeight === inputH && imgWidth === inputW) {
80
+ paddedImg = img;
81
+ ratio = 1.0;
82
+ }
83
+ else {
84
+ paddedImg = new Uint8Array(inputH * inputW * 3).fill(114);
85
+ ratio = Math.min(inputH / imgHeight, inputW / imgWidth);
86
+ const resizedW = Math.floor(imgWidth * ratio);
87
+ const resizedH = Math.floor(imgHeight * ratio);
88
+ // Resize image (simple nearest neighbor for now)
89
+ for (let y = 0; y < resizedH; y++) {
90
+ for (let x = 0; x < resizedW; x++) {
91
+ const srcX = Math.floor(x / ratio);
92
+ const srcY = Math.floor(y / ratio);
93
+ for (let c = 0; c < 3; c++) {
94
+ paddedImg[(y * inputW + x) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
95
+ }
96
+ }
97
+ }
98
+ }
99
+ // YOLOX uses simple normalization to [0, 1]
100
+ // Convert to float32 and normalize to [0, 1]
101
+ // Try BGR format (OpenCV standard)
102
+ const floatImg = new Float32Array(paddedImg.length);
103
+ for (let i = 0; i < paddedImg.length; i += 3) {
104
+ // Swap RGB to BGR
105
+ floatImg[i] = paddedImg[i + 2] / 255.0; // B
106
+ floatImg[i + 1] = paddedImg[i + 1] / 255.0; // G
107
+ floatImg[i + 2] = paddedImg[i] / 255.0; // R
108
+ }
109
+ // Transpose HWC to CHW
110
+ const transposed = new Float32Array(inputH * inputW * 3);
111
+ for (let c = 0; c < 3; c++) {
112
+ for (let h = 0; h < inputH; h++) {
113
+ for (let w = 0; w < inputW; w++) {
114
+ transposed[c * inputH * inputW + h * inputW + w] = floatImg[h * inputW * 3 + w * 3 + c];
115
+ }
116
+ }
117
+ }
118
+ console.log(`YOLOX preprocess: input ${imgWidth}x${imgHeight} -> ${inputW}x${inputH}, ratio=${ratio} (BGR)`);
119
+ return { paddedImg: transposed, ratio };
120
+ }
121
+ postprocess(outputs, ratio) {
122
+ const outputArray = new Float32Array(outputs.data);
123
+ const outputShape = outputs.dims;
124
+ console.log(`YOLOX output shape: [${outputShape}], ratio: ${ratio}`);
125
+ console.log(`First 20 values: ${Array.from(outputArray.slice(0, 20)).map(v => v.toFixed(4)).join(', ')}`);
126
+ // outputShape: [1, num_boxes, 5] or [1, num_boxes, 6]
127
+ // For YOLOX with NMS: [batch, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
128
+ if (outputShape.length === 3 && outputShape[2] >= 5) {
129
+ const numBoxes = outputShape[1];
130
+ const boxes = [];
131
+ const hasClassInfo = outputShape[2] >= 6;
132
+ console.log(`Processing ${numBoxes} boxes, hasClassInfo: ${hasClassInfo}`);
133
+ for (let i = 0; i < numBoxes; i++) {
134
+ const baseIdx = i * outputShape[2];
135
+ const score = outputArray[baseIdx + 4];
136
+ // Filter by score threshold
137
+ if (score < this.scoreThr)
138
+ continue;
139
+ // Check class if available
140
+ if (hasClassInfo) {
141
+ const classId = outputArray[baseIdx + 5];
142
+ if (classId !== 0)
143
+ continue; // Only person class
144
+ }
145
+ const x1 = outputArray[baseIdx] / ratio;
146
+ const y1 = outputArray[baseIdx + 1] / ratio;
147
+ const x2 = outputArray[baseIdx + 2] / ratio;
148
+ const y2 = outputArray[baseIdx + 3] / ratio;
149
+ // Validate box coordinates
150
+ if (x1 >= x2 || y1 >= y2)
151
+ continue;
152
+ if (x2 < 0 || y2 < 0 || x1 > this.modelInputSize[1] / ratio || y1 > this.modelInputSize[0] / ratio)
153
+ continue;
154
+ console.log(`Found box: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score: ${score.toFixed(3)}`);
155
+ boxes.push({
156
+ x1: Math.max(0, x1),
157
+ y1: Math.max(0, y1),
158
+ x2: Math.min(outputShape[1] * ratio, x2),
159
+ y2: Math.min(outputShape[0] * ratio, y2),
160
+ });
161
+ }
162
+ console.log(`Total boxes found: ${boxes.length}`);
163
+ return boxes;
164
+ }
165
+ return [];
166
+ }
167
+ }