rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,606 @@
1
+ /**
2
+ * ObjectDetector - Universal object detection API
3
+ * Supports YOLO12 and other YOLO models for multi-class detection
4
+ *
5
+ * @example
6
+ * ```typescript
7
+ * // Initialize with default model (YOLOv12n from HuggingFace)
8
+ * const detector = new ObjectDetector({
9
+ * classes: ['person', 'car', 'dog'], // Filter specific classes
10
+ * });
11
+ * await detector.init();
12
+ *
13
+ * // Or with custom model
14
+ * const detector = new ObjectDetector({
15
+ * model: 'models/yolov12n.onnx',
16
+ * classes: ['person'],
17
+ * });
18
+ * await detector.init();
19
+ *
20
+ * // Detect from canvas
21
+ * const objects = await detector.detectFromCanvas(canvas);
22
+ *
23
+ * // Detect all classes
24
+ * const allObjects = await detector.detectFromCanvas(canvas, { classes: null });
25
+ * ```
26
+ */
27
+ import * as ort from 'onnxruntime-web';
28
+ import { getCachedModel, isModelCached } from '../core/modelCache';
29
+ // Configure ONNX Runtime Web
30
+ ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
31
+ ort.env.wasm.simd = true;
32
+ ort.env.wasm.proxy = false;
33
+ /**
34
+ * COCO 80-class names
35
+ */
36
+ export const COCO_CLASSES = [
37
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
38
+ 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
39
+ 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
40
+ 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
41
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
42
+ 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
43
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
44
+ 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
45
+ 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
46
+ 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush',
47
+ ];
48
+ /**
49
+ * Default configuration
50
+ */
51
+ const DEFAULT_CONFIG = {
52
+ model: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
53
+ inputSize: [416, 416], // Faster default
54
+ confidence: 0.5,
55
+ nmsThreshold: 0.45,
56
+ classes: ['person'],
57
+ backend: 'webgpu', // Default to WebGPU for better performance
58
+ mode: 'balanced',
59
+ device: 'cpu',
60
+ cache: true,
61
+ };
62
+ // Performance presets
63
+ const MODE_PRESETS = {
64
+ performance: { inputSize: [640, 640], confidence: 0.3 }, // High accuracy
65
+ balanced: { inputSize: [416, 416], confidence: 0.5 }, // Balanced
66
+ lightweight: { inputSize: [320, 320], confidence: 0.6 }, // Fastest
67
+ };
68
+ export class ObjectDetector {
69
+ constructor(config) {
70
+ this.session = null;
71
+ this.initialized = false;
72
+ this.classFilter = null;
73
+ // Pre-allocated reusable resources for performance
74
+ this.canvas = null;
75
+ this.ctx = null;
76
+ this.tensorBuffer = null;
77
+ this.inputSize = [416, 416];
78
+ // Apply mode preset if specified
79
+ let finalConfig = { ...DEFAULT_CONFIG, ...config };
80
+ // Apply mode preset if specified
81
+ if (config.mode && MODE_PRESETS[config.mode]) {
82
+ const preset = MODE_PRESETS[config.mode];
83
+ // Only override if not explicitly set
84
+ if (!config.inputSize)
85
+ finalConfig.inputSize = preset.inputSize;
86
+ if (!config.confidence)
87
+ finalConfig.confidence = preset.confidence;
88
+ }
89
+ this.config = finalConfig;
90
+ this.updateClassFilter();
91
+ console.log(`[ObjectDetector] Initialized with mode: ${config.mode || 'balanced'}, input: ${this.config.inputSize[0]}x${this.config.inputSize[1]}`);
92
+ }
93
+ /**
94
+ * Update class filter based on config
95
+ */
96
+ updateClassFilter() {
97
+ if (!this.config.classes) {
98
+ this.classFilter = null;
99
+ return;
100
+ }
101
+ this.classFilter = new Set();
102
+ this.config.classes.forEach((className) => {
103
+ const classId = COCO_CLASSES.indexOf(className.toLowerCase());
104
+ if (classId !== -1) {
105
+ this.classFilter.add(classId);
106
+ }
107
+ else {
108
+ console.warn(`[ObjectDetector] Unknown class: ${className}`);
109
+ }
110
+ });
111
+ }
112
+ /**
113
+ * Set which classes to detect
114
+ * @param classes - Array of class names or null for all classes
115
+ */
116
+ setClasses(classes) {
117
+ this.config.classes = classes;
118
+ this.updateClassFilter();
119
+ }
120
+ /**
121
+ * Get list of available COCO classes
122
+ */
123
+ getAvailableClasses() {
124
+ return [...COCO_CLASSES];
125
+ }
126
+ /**
127
+ * Get currently filtered classes
128
+ */
129
+ getFilteredClasses() {
130
+ return this.config.classes;
131
+ }
132
+ /**
133
+ * Initialize detection model and pre-allocate resources
134
+ */
135
+ async init() {
136
+ if (this.initialized)
137
+ return;
138
+ try {
139
+ console.log(`[ObjectDetector] Loading model from: ${this.config.model}`);
140
+ let modelBuffer;
141
+ // Use cached model if caching is enabled
142
+ if (this.config.cache) {
143
+ const isCached = await isModelCached(this.config.model);
144
+ console.log(`[ObjectDetector] Cache ${isCached ? 'hit' : 'miss'} for model`);
145
+ modelBuffer = await getCachedModel(this.config.model);
146
+ }
147
+ else {
148
+ console.log(`[ObjectDetector] Caching disabled, fetching from network`);
149
+ const response = await fetch(this.config.model);
150
+ if (!response.ok) {
151
+ throw new Error(`Failed to fetch model: HTTP ${response.status} ${response.statusText}`);
152
+ }
153
+ modelBuffer = await response.arrayBuffer();
154
+ }
155
+ console.log(`[ObjectDetector] Model loaded, size: ${(modelBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
156
+ this.session = await ort.InferenceSession.create(modelBuffer, {
157
+ executionProviders: [this.config.backend],
158
+ graphOptimizationLevel: 'all',
159
+ });
160
+ // Pre-allocate canvas and tensor buffer for performance
161
+ const [w, h] = this.config.inputSize;
162
+ this.inputSize = [w, h];
163
+ this.canvas = document.createElement('canvas');
164
+ this.canvas.width = w;
165
+ this.canvas.height = h;
166
+ this.ctx = this.canvas.getContext('2d', {
167
+ willReadFrequently: true,
168
+ alpha: false // Faster, no transparency
169
+ });
170
+ // Pre-allocate tensor buffer (3 channels * width * height)
171
+ this.tensorBuffer = new Float32Array(3 * w * h);
172
+ this.initialized = true;
173
+ console.log(`[ObjectDetector] ✅ Initialized (${w}x${h}, ${this.config.backend})`);
174
+ }
175
+ catch (error) {
176
+ console.error('[ObjectDetector] ❌ Initialization failed:', error);
177
+ throw error;
178
+ }
179
+ }
180
+ /**
181
+ * Detect objects from HTMLCanvasElement
182
+ */
183
+ async detectFromCanvas(canvas) {
184
+ const ctx = canvas.getContext('2d');
185
+ if (!ctx) {
186
+ throw new Error('Could not get 2D context from canvas');
187
+ }
188
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
189
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
190
+ }
191
+ /**
192
+ * Detect objects from HTMLVideoElement
193
+ */
194
+ async detectFromVideo(video, targetCanvas) {
195
+ if (video.readyState < 2) {
196
+ throw new Error('Video not ready. Ensure video is loaded and playing.');
197
+ }
198
+ const canvas = targetCanvas || document.createElement('canvas');
199
+ canvas.width = video.videoWidth;
200
+ canvas.height = video.videoHeight;
201
+ const ctx = canvas.getContext('2d');
202
+ if (!ctx) {
203
+ throw new Error('Could not get 2D context from canvas');
204
+ }
205
+ ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
206
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
207
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
208
+ }
209
+ /**
210
+ * Detect objects from HTMLImageElement
211
+ */
212
+ async detectFromImage(image, targetCanvas) {
213
+ if (!image.complete || !image.naturalWidth) {
214
+ throw new Error('Image not loaded. Ensure image is fully loaded.');
215
+ }
216
+ const canvas = targetCanvas || document.createElement('canvas');
217
+ canvas.width = image.naturalWidth;
218
+ canvas.height = image.naturalHeight;
219
+ const ctx = canvas.getContext('2d');
220
+ if (!ctx) {
221
+ throw new Error('Could not get 2D context from canvas');
222
+ }
223
+ ctx.drawImage(image, 0, 0);
224
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
225
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
226
+ }
227
+ /**
228
+ * Detect objects from ImageBitmap
229
+ */
230
+ async detectFromBitmap(bitmap, targetCanvas) {
231
+ const canvas = targetCanvas || document.createElement('canvas');
232
+ canvas.width = bitmap.width;
233
+ canvas.height = bitmap.height;
234
+ const ctx = canvas.getContext('2d');
235
+ if (!ctx) {
236
+ throw new Error('Could not get 2D context from canvas');
237
+ }
238
+ ctx.drawImage(bitmap, 0, 0);
239
+ const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
240
+ return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
241
+ }
242
+ /**
243
+ * Detect objects from File
244
+ */
245
+ async detectFromFile(file, targetCanvas) {
246
+ return new Promise((resolve, reject) => {
247
+ const img = new Image();
248
+ img.onload = async () => {
249
+ try {
250
+ const results = await this.detectFromImage(img, targetCanvas);
251
+ resolve(results);
252
+ }
253
+ catch (error) {
254
+ reject(error);
255
+ }
256
+ };
257
+ img.onerror = () => reject(new Error('Failed to load image from file'));
258
+ img.src = URL.createObjectURL(file);
259
+ });
260
+ }
261
+ /**
262
+ * Detect objects from Blob
263
+ */
264
+ async detectFromBlob(blob, targetCanvas) {
265
+ const bitmap = await createImageBitmap(blob);
266
+ const results = await this.detectFromBitmap(bitmap, targetCanvas);
267
+ bitmap.close();
268
+ return results;
269
+ }
270
+ /**
271
+ * Detect objects from raw image data
272
+ */
273
+ async detect(imageData, width, height) {
274
+ if (!this.initialized) {
275
+ await this.init();
276
+ }
277
+ const startTime = performance.now();
278
+ const [inputH, inputW] = this.config.inputSize;
279
+ // Preprocess
280
+ const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocess(imageData, width, height, [inputW, inputH]);
281
+ // Inference - use dynamic input name
282
+ const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
283
+ const inputName = this.session.inputNames[0]; // Dynamic: 'images' or 'pixel_values'
284
+ console.log(`[ObjectDetector] Using input name: ${inputName}`);
285
+ console.log(`[ObjectDetector] Input shape: [1, 3, ${inputH}, ${inputW}]`);
286
+ const feeds = {};
287
+ feeds[inputName] = inputTensor;
288
+ const results = await this.session.run(feeds);
289
+ const output = results[this.session.outputNames[0]];
290
+ console.log(`[ObjectDetector] Output shape: [${output.dims}]`);
291
+ console.log(`[ObjectDetector] Output type: ${output.type}`);
292
+ // Postprocess
293
+ const detections = this.postprocess(output.data, output.dims[1], output.dims, width, height, paddingX, paddingY, scaleX, scaleY);
294
+ const inferenceTime = performance.now() - startTime;
295
+ // Attach stats
296
+ detections.stats = this.calculateStats(detections, inferenceTime);
297
+ return detections;
298
+ }
299
+ /**
300
+ * Optimized preprocess with resource reuse
301
+ */
302
+ preprocess(imageData, imgWidth, imgHeight, inputSize) {
303
+ const [inputW, inputH] = inputSize;
304
+ // Reuse pre-allocated canvas
305
+ if (!this.canvas || !this.ctx) {
306
+ this.canvas = document.createElement('canvas');
307
+ this.canvas.width = inputW;
308
+ this.canvas.height = inputH;
309
+ this.ctx = this.canvas.getContext('2d', {
310
+ willReadFrequently: true,
311
+ alpha: false
312
+ });
313
+ this.tensorBuffer = new Float32Array(3 * inputW * inputH);
314
+ }
315
+ const ctx = this.ctx;
316
+ // Fast clear
317
+ ctx.clearRect(0, 0, inputW, inputH);
318
+ // Calculate letterbox
319
+ const aspectRatio = imgWidth / imgHeight;
320
+ const targetAspectRatio = inputW / inputH;
321
+ let drawWidth, drawHeight, offsetX, offsetY;
322
+ if (aspectRatio > targetAspectRatio) {
323
+ drawWidth = inputW;
324
+ drawHeight = (inputW / aspectRatio) | 0; // Faster than Math.floor
325
+ offsetX = 0;
326
+ offsetY = ((inputH - drawHeight) / 2) | 0;
327
+ }
328
+ else {
329
+ drawHeight = inputH;
330
+ drawWidth = (inputH * aspectRatio) | 0;
331
+ offsetX = ((inputW - drawWidth) / 2) | 0;
332
+ offsetY = 0;
333
+ }
334
+ // Draw directly without intermediate canvas (faster)
335
+ const srcCanvas = document.createElement('canvas');
336
+ srcCanvas.width = imgWidth;
337
+ srcCanvas.height = imgHeight;
338
+ const srcCtx = srcCanvas.getContext('2d');
339
+ const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
340
+ srcImageData.data.set(imageData);
341
+ srcCtx.putImageData(srcImageData, 0, 0);
342
+ // Draw with letterbox
343
+ ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
344
+ const paddedData = ctx.getImageData(0, 0, inputW, inputH);
345
+ // Optimized normalization loop (reuse buffer)
346
+ const tensor = this.tensorBuffer;
347
+ const len = paddedData.data.length;
348
+ const planeSize = inputW * inputH;
349
+ // Unroll loop for speed (process 4 pixels at once)
350
+ for (let i = 0; i < len; i += 16) {
351
+ const i1 = i, i2 = i + 4, i3 = i + 8, i4 = i + 12;
352
+ const p1 = i1 / 4, p2 = i2 / 4, p3 = i3 / 4, p4 = i4 / 4;
353
+ // R channel
354
+ tensor[p1] = paddedData.data[i1] * 0.003921569; // / 255
355
+ tensor[p2] = paddedData.data[i2] * 0.003921569;
356
+ tensor[p3] = paddedData.data[i3] * 0.003921569;
357
+ tensor[p4] = paddedData.data[i4] * 0.003921569;
358
+ // G channel
359
+ tensor[p1 + planeSize] = paddedData.data[i1 + 1] * 0.003921569;
360
+ tensor[p2 + planeSize] = paddedData.data[i2 + 1] * 0.003921569;
361
+ tensor[p3 + planeSize] = paddedData.data[i3 + 1] * 0.003921569;
362
+ tensor[p4 + planeSize] = paddedData.data[i4 + 1] * 0.003921569;
363
+ // B channel
364
+ tensor[p1 + planeSize * 2] = paddedData.data[i1 + 2] * 0.003921569;
365
+ tensor[p2 + planeSize * 2] = paddedData.data[i2 + 2] * 0.003921569;
366
+ tensor[p3 + planeSize * 2] = paddedData.data[i3 + 2] * 0.003921569;
367
+ tensor[p4 + planeSize * 2] = paddedData.data[i4 + 2] * 0.003921569;
368
+ }
369
+ const scaleX = imgWidth / drawWidth;
370
+ const scaleY = imgHeight / drawHeight;
371
+ return {
372
+ tensor,
373
+ paddingX: offsetX,
374
+ paddingY: offsetY,
375
+ scaleX,
376
+ scaleY,
377
+ };
378
+ }
379
+ /**
380
+ * Postprocess YOLO output - supports multiple output formats
381
+ */
382
+ postprocess(output, numDetections, outputShape, imgWidth, imgHeight, paddingX, paddingY, scaleX, scaleY) {
383
+ const detections = [];
384
+ // Format 1: [batch, boxes, 6] - [x1, y1, x2, y2, conf, class]
385
+ if (outputShape.length === 3 && outputShape[2] === 6) {
386
+ for (let i = 0; i < numDetections; i++) {
387
+ const idx = i * 6;
388
+ const x1 = output[idx];
389
+ const y1 = output[idx + 1];
390
+ const x2 = output[idx + 2];
391
+ const y2 = output[idx + 3];
392
+ const confidence = output[idx + 4];
393
+ const classId = Math.round(output[idx + 5]);
394
+ if (confidence < this.config.confidence)
395
+ continue;
396
+ if (this.classFilter && !this.classFilter.has(classId))
397
+ continue;
398
+ if (x2 <= x1 || y2 <= y1)
399
+ continue;
400
+ const tx1 = (x1 - paddingX) * scaleX;
401
+ const ty1 = (y1 - paddingY) * scaleY;
402
+ const tx2 = (x2 - paddingX) * scaleX;
403
+ const ty2 = (y2 - paddingY) * scaleY;
404
+ detections.push({
405
+ bbox: {
406
+ x1: Math.max(0, tx1),
407
+ y1: Math.max(0, ty1),
408
+ x2: Math.min(imgWidth, tx2),
409
+ y2: Math.min(imgHeight, ty2),
410
+ confidence,
411
+ },
412
+ classId,
413
+ className: COCO_CLASSES[classId] || `class_${classId}`,
414
+ confidence,
415
+ });
416
+ }
417
+ }
418
+ // Format 2: [batch, boxes, 80+] - YOLOv26 style
419
+ // Format: [class_scores..., cx, cy, w, h] - center format with width/height
420
+ else if (outputShape.length === 3 && outputShape[2] >= 80) {
421
+ const numClasses = outputShape[2] - 4;
422
+ const [inputH, inputW] = this.config.inputSize;
423
+ console.log(`[ObjectDetector] Trying YOLOv26 format (center format) with ${numClasses} classes`);
424
+ for (let i = 0; i < numDetections; i++) {
425
+ const baseIdx = i * outputShape[2];
426
+ // Raw bbox values - try direct interpretation first
427
+ // YOLOv26 may output already decoded coordinates
428
+ let x1 = output[baseIdx + numClasses];
429
+ let y1 = output[baseIdx + numClasses + 1];
430
+ let x2 = output[baseIdx + numClasses + 2];
431
+ let y2 = output[baseIdx + numClasses + 3];
432
+ // If values are very small (< 1), they might be logits - apply sigmoid
433
+ if (Math.abs(x1) < 1 && Math.abs(y1) < 1) {
434
+ // Apply sigmoid and scale
435
+ x1 = (1 / (1 + Math.exp(-x1))) * inputW;
436
+ y1 = (1 / (1 + Math.exp(-y1))) * inputH;
437
+ x2 = (1 / (1 + Math.exp(-x2))) * inputW;
438
+ y2 = (1 / (1 + Math.exp(-y2))) * inputH;
439
+ }
440
+ // If values are negative but large, apply sigmoid only
441
+ else if (x1 < 0 || y1 < 0) {
442
+ x1 = (1 / (1 + Math.exp(-x1))) * inputW;
443
+ y1 = (1 / (1 + Math.exp(-y1))) * inputH;
444
+ x2 = (1 / (1 + Math.exp(-x2))) * inputW;
445
+ y2 = (1 / (1 + Math.exp(-y2))) * inputH;
446
+ }
447
+ // Otherwise use as-is (already decoded)
448
+ // Debug first detection
449
+ if (i === 0) {
450
+ console.log(`[ObjectDetector] Raw bbox: [${output[baseIdx + numClasses]}, ${output[baseIdx + numClasses + 1]}, ${output[baseIdx + numClasses + 2]}, ${output[baseIdx + numClasses + 3]}]`);
451
+ console.log(`[ObjectDetector] Decoded bbox: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
452
+ }
453
+ // Find best class and confidence
454
+ let bestClass = 0;
455
+ let bestScore = -Infinity;
456
+ for (let c = 0; c < numClasses; c++) {
457
+ const score = output[baseIdx + c];
458
+ if (score > bestScore) {
459
+ bestScore = score;
460
+ bestClass = c;
461
+ }
462
+ }
463
+ // Apply sigmoid to class score
464
+ const confidence = 1 / (1 + Math.exp(-bestScore));
465
+ // Debug first few detections
466
+ if (i < 5 && confidence > 0.05) {
467
+ console.log(`[ObjectDetector] Box ${i}: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
468
+ console.log(`[ObjectDetector] -> class=${bestClass} (${COCO_CLASSES[bestClass] || 'unknown'}), confidence=${(confidence * 100).toFixed(1)}%`);
469
+ }
470
+ if (confidence < this.config.confidence)
471
+ continue;
472
+ if (this.classFilter && !this.classFilter.has(bestClass))
473
+ continue;
474
+ if (x2 <= x1 || y2 <= y1)
475
+ continue;
476
+ if (x1 < 0 && x2 < 0)
477
+ continue;
478
+ if (y1 < 0 && y2 < 0)
479
+ continue;
480
+ // Transform to original image space
481
+ const tx1 = (x1 - paddingX) * scaleX;
482
+ const ty1 = (y1 - paddingY) * scaleY;
483
+ const tx2 = (x2 - paddingX) * scaleX;
484
+ const ty2 = (y2 - paddingY) * scaleY;
485
+ detections.push({
486
+ bbox: {
487
+ x1: Math.max(0, tx1),
488
+ y1: Math.max(0, ty1),
489
+ x2: Math.min(imgWidth, tx2),
490
+ y2: Math.min(imgHeight, ty2),
491
+ confidence,
492
+ },
493
+ classId: bestClass,
494
+ className: COCO_CLASSES[bestClass] || `class_${bestClass}`,
495
+ confidence,
496
+ });
497
+ }
498
+ }
499
+ // Debug logging
500
+ if (detections.length > 0) {
501
+ console.log(`[ObjectDetector] ✅ Found ${detections.length} detections`);
502
+ console.log(`[ObjectDetector] First:`, detections[0]);
503
+ }
504
+ else {
505
+ console.log(`[ObjectDetector] ❌ No detections above threshold ${this.config.confidence}`);
506
+ // Log top 3 scores for debugging
507
+ const topScores = [];
508
+ const numClasses = outputShape.length === 3 ? outputShape[2] - 4 : 80;
509
+ for (let i = 0; i < Math.min(3, numDetections); i++) {
510
+ const baseIdx = i * outputShape[2];
511
+ let bestScore = -Infinity;
512
+ for (let c = 0; c < numClasses; c++) {
513
+ const score = output[baseIdx + c];
514
+ if (score > bestScore)
515
+ bestScore = score;
516
+ }
517
+ const confidence = bestScore > 0 && bestScore <= 1 ? bestScore : 1 / (1 + Math.exp(-bestScore));
518
+ topScores.push(confidence);
519
+ }
520
+ console.log(`[ObjectDetector] Top 3 confidences: ${topScores.map(s => (s * 100).toFixed(1) + '%').join(', ')}`);
521
+ }
522
+ // NMS
523
+ return this.applyMultiClassNMS(detections, this.config.nmsThreshold);
524
+ }
525
+ /**
526
+ * Multi-class Non-Maximum Suppression
527
+ */
528
+ applyMultiClassNMS(detections, iouThreshold) {
529
+ if (detections.length === 0)
530
+ return [];
531
+ // Group by class
532
+ const byClass = new Map();
533
+ detections.forEach((det) => {
534
+ const classDets = byClass.get(det.classId) || [];
535
+ classDets.push(det);
536
+ byClass.set(det.classId, classDets);
537
+ });
538
+ // Apply NMS per class
539
+ const selected = [];
540
+ byClass.forEach((classDets) => {
541
+ classDets.sort((a, b) => b.confidence - a.confidence);
542
+ const used = new Set();
543
+ for (let i = 0; i < classDets.length; i++) {
544
+ if (used.has(i))
545
+ continue;
546
+ selected.push(classDets[i]);
547
+ used.add(i);
548
+ for (let j = i + 1; j < classDets.length; j++) {
549
+ if (used.has(j))
550
+ continue;
551
+ const iou = this.calculateIoU(classDets[i].bbox, classDets[j].bbox);
552
+ if (iou > iouThreshold) {
553
+ used.add(j);
554
+ }
555
+ }
556
+ }
557
+ });
558
+ return selected;
559
+ }
560
+ /**
561
+ * Calculate IoU between two boxes
562
+ */
563
+ calculateIoU(box1, box2) {
564
+ const x1 = Math.max(box1.x1, box2.x1);
565
+ const y1 = Math.max(box1.y1, box2.y1);
566
+ const x2 = Math.min(box1.x2, box2.x2);
567
+ const y2 = Math.min(box1.y2, box2.y2);
568
+ if (x2 <= x1 || y2 <= y1)
569
+ return 0;
570
+ const intersection = (x2 - x1) * (y2 - y1);
571
+ const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
572
+ const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
573
+ const union = area1 + area2 - intersection;
574
+ return intersection / union;
575
+ }
576
+ /**
577
+ * Calculate detection statistics
578
+ */
579
+ calculateStats(detections, inferenceTime) {
580
+ const classCounts = {};
581
+ detections.forEach((det) => {
582
+ classCounts[det.className] = (classCounts[det.className] || 0) + 1;
583
+ });
584
+ return {
585
+ totalCount: detections.length,
586
+ classCounts,
587
+ inferenceTime: Math.round(inferenceTime),
588
+ };
589
+ }
590
+ /**
591
+ * Get statistics from last detection
592
+ */
593
+ getStats() {
594
+ return null;
595
+ }
596
+ /**
597
+ * Dispose resources
598
+ */
599
+ dispose() {
600
+ if (this.session) {
601
+ this.session.release();
602
+ this.session = null;
603
+ }
604
+ this.initialized = false;
605
+ }
606
+ }