rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,214 @@
1
+ /**
2
+ * YOLOX object detection model
3
+ * Based on https://github.com/IDEA-Research/DWPose/blob/opencv_onnx/ControlNet-v1-1-nightly/annotator/dwpose/cv_ox_det.py
4
+ */
5
+
6
+ import { BaseTool } from '../core/base';
7
+ import { multiclassNms } from '../core/postprocessing';
8
+ import { BBox, BackendType } from '../types/index';
9
+
10
+ export class YOLOX extends BaseTool {
11
+ private nmsThr: number;
12
+ public scoreThr: number;
13
+ private initialized: boolean = false;
14
+
15
+ constructor(
16
+ onnxModel: string,
17
+ modelInputSize: [number, number] = [640, 640],
18
+ nmsThr: number = 0.45,
19
+ scoreThr: number = 0.3, // Lower default threshold
20
+ backend: BackendType = 'webgpu'
21
+ ) {
22
+ super(onnxModel, modelInputSize, null, null, backend);
23
+ this.nmsThr = nmsThr;
24
+ this.scoreThr = scoreThr;
25
+ }
26
+
27
+ async init(): Promise<void> {
28
+ // Web version - model path is direct URL
29
+ await super.init();
30
+ this.initialized = true;
31
+ }
32
+
33
+ async call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<BBox[]> {
34
+ if (!this.initialized) {
35
+ await this.init();
36
+ }
37
+
38
+ const { paddedImg, ratio } = this.preprocess(image, imgWidth, imgHeight);
39
+ const outputs = await this.inference(paddedImg);
40
+
41
+ console.log(`YOLOX: Got ${outputs.length} outputs`);
42
+ for (let i = 0; i < outputs.length; i++) {
43
+ console.log(` Output[${i}]: dims=[${outputs[i].dims}], type=${outputs[i].type}`);
44
+ }
45
+
46
+ // For end2end YOLOX with built-in NMS:
47
+ // Output 0: [1, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
48
+ // Output 1: [1, num_dets] or [1, 1] with count
49
+
50
+ const detOutput = outputs[0];
51
+ const detShape = detOutput.dims; // [1, num_dets, 5]
52
+
53
+ console.log(`YOLOX: detShape=[${detShape}], ratio=${ratio}`);
54
+
55
+ if (detShape.length === 3 && detShape[2] === 5 && detOutput.type === 'float32') {
56
+ const detArray = detOutput.data as Float32Array;
57
+ const numDets = detShape[1];
58
+ const boxes: BBox[] = [];
59
+
60
+ console.log(`YOLOX: Raw detections (first 5):`);
61
+ for (let i = 0; i < Math.min(5, numDets); i++) {
62
+ const baseIdx = i * 5;
63
+ const x1 = detArray[baseIdx];
64
+ const y1 = detArray[baseIdx + 1];
65
+ const x2 = detArray[baseIdx + 2];
66
+ const y2 = detArray[baseIdx + 3];
67
+ const score = detArray[baseIdx + 4];
68
+ console.log(` [${i}] raw=[${x1.toFixed(2)}, ${y1.toFixed(2)}, ${x2.toFixed(2)}, ${y2.toFixed(2)}] score=${score.toFixed(4)}`);
69
+ }
70
+
71
+ for (let i = 0; i < numDets; i++) {
72
+ const baseIdx = i * 5;
73
+ let x1 = detArray[baseIdx];
74
+ let y1 = detArray[baseIdx + 1];
75
+ let x2 = detArray[baseIdx + 2];
76
+ let y2 = detArray[baseIdx + 3];
77
+ const score = detArray[baseIdx + 4];
78
+
79
+ // Scale to original image
80
+ x1 /= ratio;
81
+ y1 /= ratio;
82
+ x2 /= ratio;
83
+ y2 /= ratio;
84
+
85
+ // Python uses score > 0.3 threshold
86
+ if (score > 0.3 && x2 > x1 && y2 > y1) {
87
+ boxes.push({ x1, y1, x2, y2 });
88
+ console.log(` [${i}] ACCEPTED: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score=${score.toFixed(3)}`);
89
+ } else if (score > 0.1) {
90
+ console.log(` [${i}] rejected (score=${score.toFixed(3)}): [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
91
+ }
92
+ }
93
+
94
+ console.log(`YOLOX: Found ${boxes.length} boxes`);
95
+ return boxes;
96
+ }
97
+
98
+ return [];
99
+ }
100
+
101
+ private preprocess(
102
+ img: Uint8Array,
103
+ imgWidth: number,
104
+ imgHeight: number
105
+ ): { paddedImg: Float32Array; ratio: number } {
106
+ const [inputH, inputW] = this.modelInputSize;
107
+
108
+ let paddedImg: Uint8Array;
109
+ let ratio: number;
110
+
111
+ if (imgHeight === inputH && imgWidth === inputW) {
112
+ paddedImg = img;
113
+ ratio = 1.0;
114
+ } else {
115
+ paddedImg = new Uint8Array(inputH * inputW * 3).fill(114);
116
+
117
+ ratio = Math.min(inputH / imgHeight, inputW / imgWidth);
118
+ const resizedW = Math.floor(imgWidth * ratio);
119
+ const resizedH = Math.floor(imgHeight * ratio);
120
+
121
+ // Resize image (simple nearest neighbor for now)
122
+ for (let y = 0; y < resizedH; y++) {
123
+ for (let x = 0; x < resizedW; x++) {
124
+ const srcX = Math.floor(x / ratio);
125
+ const srcY = Math.floor(y / ratio);
126
+ for (let c = 0; c < 3; c++) {
127
+ paddedImg[(y * inputW + x) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
128
+ }
129
+ }
130
+ }
131
+ }
132
+
133
+ // YOLOX uses simple normalization to [0, 1]
134
+ // Convert to float32 and normalize to [0, 1]
135
+ // Try BGR format (OpenCV standard)
136
+ const floatImg = new Float32Array(paddedImg.length);
137
+ for (let i = 0; i < paddedImg.length; i += 3) {
138
+ // Swap RGB to BGR
139
+ floatImg[i] = paddedImg[i + 2] / 255.0; // B
140
+ floatImg[i + 1] = paddedImg[i + 1] / 255.0; // G
141
+ floatImg[i + 2] = paddedImg[i] / 255.0; // R
142
+ }
143
+
144
+ // Transpose HWC to CHW
145
+ const transposed = new Float32Array(inputH * inputW * 3);
146
+ for (let c = 0; c < 3; c++) {
147
+ for (let h = 0; h < inputH; h++) {
148
+ for (let w = 0; w < inputW; w++) {
149
+ transposed[c * inputH * inputW + h * inputW + w] = floatImg[h * inputW * 3 + w * 3 + c];
150
+ }
151
+ }
152
+ }
153
+
154
+ console.log(`YOLOX preprocess: input ${imgWidth}x${imgHeight} -> ${inputW}x${inputH}, ratio=${ratio} (BGR)`);
155
+
156
+ return { paddedImg: transposed, ratio };
157
+ }
158
+
159
+ private postprocess(outputs: any, ratio: number): BBox[] {
160
+ const outputArray = new Float32Array(outputs.data);
161
+ const outputShape = outputs.dims;
162
+
163
+ console.log(`YOLOX output shape: [${outputShape}], ratio: ${ratio}`);
164
+ console.log(`First 20 values: ${Array.from(outputArray.slice(0, 20)).map(v => v.toFixed(4)).join(', ')}`);
165
+
166
+ // outputShape: [1, num_boxes, 5] or [1, num_boxes, 6]
167
+ // For YOLOX with NMS: [batch, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
168
+
169
+ if (outputShape.length === 3 && outputShape[2] >= 5) {
170
+ const numBoxes = outputShape[1];
171
+ const boxes: BBox[] = [];
172
+ const hasClassInfo = outputShape[2] >= 6;
173
+
174
+ console.log(`Processing ${numBoxes} boxes, hasClassInfo: ${hasClassInfo}`);
175
+
176
+ for (let i = 0; i < numBoxes; i++) {
177
+ const baseIdx = i * outputShape[2];
178
+ const score = outputArray[baseIdx + 4];
179
+
180
+ // Filter by score threshold
181
+ if (score < this.scoreThr) continue;
182
+
183
+ // Check class if available
184
+ if (hasClassInfo) {
185
+ const classId = outputArray[baseIdx + 5];
186
+ if (classId !== 0) continue; // Only person class
187
+ }
188
+
189
+ const x1 = outputArray[baseIdx] / ratio;
190
+ const y1 = outputArray[baseIdx + 1] / ratio;
191
+ const x2 = outputArray[baseIdx + 2] / ratio;
192
+ const y2 = outputArray[baseIdx + 3] / ratio;
193
+
194
+ // Validate box coordinates
195
+ if (x1 >= x2 || y1 >= y2) continue;
196
+ if (x2 < 0 || y2 < 0 || x1 > this.modelInputSize[1] / ratio || y1 > this.modelInputSize[0] / ratio) continue;
197
+
198
+ console.log(`Found box: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score: ${score.toFixed(3)}`);
199
+
200
+ boxes.push({
201
+ x1: Math.max(0, x1),
202
+ y1: Math.max(0, y1),
203
+ x2: Math.min(outputShape[1] * ratio, x2),
204
+ y2: Math.min(outputShape[0] * ratio, y2),
205
+ });
206
+ }
207
+
208
+ console.log(`Total boxes found: ${boxes.length}`);
209
+ return boxes;
210
+ }
211
+
212
+ return [];
213
+ }
214
+ }