rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.gitattributes +1 -0
  2. package/README.md +202 -0
  3. package/dist/core/base.d.ts +20 -0
  4. package/dist/core/base.d.ts.map +1 -0
  5. package/dist/core/base.js +40 -0
  6. package/dist/core/file.d.ts +11 -0
  7. package/dist/core/file.d.ts.map +1 -0
  8. package/dist/core/file.js +111 -0
  9. package/dist/core/modelCache.d.ts +35 -0
  10. package/dist/core/modelCache.d.ts.map +1 -0
  11. package/dist/core/modelCache.js +161 -0
  12. package/dist/core/posePostprocessing.d.ts +12 -0
  13. package/dist/core/posePostprocessing.d.ts.map +1 -0
  14. package/dist/core/posePostprocessing.js +76 -0
  15. package/dist/core/postprocessing.d.ts +10 -0
  16. package/dist/core/postprocessing.d.ts.map +1 -0
  17. package/dist/core/postprocessing.js +70 -0
  18. package/dist/core/preprocessing.d.ts +14 -0
  19. package/dist/core/preprocessing.d.ts.map +1 -0
  20. package/dist/core/preprocessing.js +79 -0
  21. package/dist/index.d.ts +27 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +31 -0
  24. package/dist/models/rtmpose.d.ts +25 -0
  25. package/dist/models/rtmpose.d.ts.map +1 -0
  26. package/dist/models/rtmpose.js +185 -0
  27. package/dist/models/rtmpose3d.d.ts +28 -0
  28. package/dist/models/rtmpose3d.d.ts.map +1 -0
  29. package/dist/models/rtmpose3d.js +184 -0
  30. package/dist/models/yolo12.d.ts +23 -0
  31. package/dist/models/yolo12.d.ts.map +1 -0
  32. package/dist/models/yolo12.js +165 -0
  33. package/dist/models/yolox.d.ts +18 -0
  34. package/dist/models/yolox.d.ts.map +1 -0
  35. package/dist/models/yolox.js +167 -0
  36. package/dist/solution/animalDetector.d.ts +229 -0
  37. package/dist/solution/animalDetector.d.ts.map +1 -0
  38. package/dist/solution/animalDetector.js +663 -0
  39. package/dist/solution/body.d.ts +16 -0
  40. package/dist/solution/body.d.ts.map +1 -0
  41. package/dist/solution/body.js +52 -0
  42. package/dist/solution/bodyWithFeet.d.ts +16 -0
  43. package/dist/solution/bodyWithFeet.d.ts.map +1 -0
  44. package/dist/solution/bodyWithFeet.js +52 -0
  45. package/dist/solution/customDetector.d.ts +137 -0
  46. package/dist/solution/customDetector.d.ts.map +1 -0
  47. package/dist/solution/customDetector.js +342 -0
  48. package/dist/solution/hand.d.ts +14 -0
  49. package/dist/solution/hand.d.ts.map +1 -0
  50. package/dist/solution/hand.js +20 -0
  51. package/dist/solution/index.d.ts +10 -0
  52. package/dist/solution/index.d.ts.map +1 -0
  53. package/dist/solution/index.js +9 -0
  54. package/dist/solution/objectDetector.d.ts +172 -0
  55. package/dist/solution/objectDetector.d.ts.map +1 -0
  56. package/dist/solution/objectDetector.js +606 -0
  57. package/dist/solution/pose3dDetector.d.ts +145 -0
  58. package/dist/solution/pose3dDetector.d.ts.map +1 -0
  59. package/dist/solution/pose3dDetector.js +611 -0
  60. package/dist/solution/poseDetector.d.ts +198 -0
  61. package/dist/solution/poseDetector.d.ts.map +1 -0
  62. package/dist/solution/poseDetector.js +622 -0
  63. package/dist/solution/poseTracker.d.ts +22 -0
  64. package/dist/solution/poseTracker.d.ts.map +1 -0
  65. package/dist/solution/poseTracker.js +106 -0
  66. package/dist/solution/wholebody.d.ts +19 -0
  67. package/dist/solution/wholebody.d.ts.map +1 -0
  68. package/dist/solution/wholebody.js +82 -0
  69. package/dist/solution/wholebody3d.d.ts +22 -0
  70. package/dist/solution/wholebody3d.d.ts.map +1 -0
  71. package/dist/solution/wholebody3d.js +75 -0
  72. package/dist/types/index.d.ts +52 -0
  73. package/dist/types/index.d.ts.map +1 -0
  74. package/dist/types/index.js +5 -0
  75. package/dist/visualization/draw.d.ts +57 -0
  76. package/dist/visualization/draw.d.ts.map +1 -0
  77. package/dist/visualization/draw.js +400 -0
  78. package/dist/visualization/skeleton/coco133.d.ts +350 -0
  79. package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
  80. package/dist/visualization/skeleton/coco133.js +120 -0
  81. package/dist/visualization/skeleton/coco17.d.ts +180 -0
  82. package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
  83. package/dist/visualization/skeleton/coco17.js +48 -0
  84. package/dist/visualization/skeleton/halpe26.d.ts +278 -0
  85. package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
  86. package/dist/visualization/skeleton/halpe26.js +70 -0
  87. package/dist/visualization/skeleton/hand21.d.ts +196 -0
  88. package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
  89. package/dist/visualization/skeleton/hand21.js +51 -0
  90. package/dist/visualization/skeleton/index.d.ts +10 -0
  91. package/dist/visualization/skeleton/index.d.ts.map +1 -0
  92. package/dist/visualization/skeleton/index.js +9 -0
  93. package/dist/visualization/skeleton/openpose134.d.ts +357 -0
  94. package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
  95. package/dist/visualization/skeleton/openpose134.js +116 -0
  96. package/dist/visualization/skeleton/openpose18.d.ts +177 -0
  97. package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
  98. package/dist/visualization/skeleton/openpose18.js +47 -0
  99. package/docs/ANIMAL_DETECTOR.md +450 -0
  100. package/docs/CUSTOM_DETECTOR.md +568 -0
  101. package/docs/OBJECT_DETECTOR.md +373 -0
  102. package/docs/POSE3D_DETECTOR.md +458 -0
  103. package/docs/POSE_DETECTOR.md +442 -0
  104. package/examples/README.md +119 -0
  105. package/examples/index.html +746 -0
  106. package/package.json +51 -0
  107. package/playground/README.md +114 -0
  108. package/playground/app/favicon.ico +0 -0
  109. package/playground/app/globals.css +17 -0
  110. package/playground/app/layout.tsx +19 -0
  111. package/playground/app/page.tsx +1338 -0
  112. package/playground/eslint.config.mjs +18 -0
  113. package/playground/next.config.ts +34 -0
  114. package/playground/package-lock.json +6723 -0
  115. package/playground/package.json +27 -0
  116. package/playground/postcss.config.mjs +7 -0
  117. package/playground/tsconfig.json +34 -0
  118. package/src/core/base.ts +66 -0
  119. package/src/core/file.ts +141 -0
  120. package/src/core/modelCache.ts +189 -0
  121. package/src/core/posePostprocessing.ts +91 -0
  122. package/src/core/postprocessing.ts +93 -0
  123. package/src/core/preprocessing.ts +127 -0
  124. package/src/index.ts +69 -0
  125. package/src/models/rtmpose.ts +265 -0
  126. package/src/models/rtmpose3d.ts +289 -0
  127. package/src/models/yolo12.ts +220 -0
  128. package/src/models/yolox.ts +214 -0
  129. package/src/solution/animalDetector.ts +955 -0
  130. package/src/solution/body.ts +89 -0
  131. package/src/solution/bodyWithFeet.ts +89 -0
  132. package/src/solution/customDetector.ts +474 -0
  133. package/src/solution/hand.ts +52 -0
  134. package/src/solution/index.ts +10 -0
  135. package/src/solution/objectDetector.ts +816 -0
  136. package/src/solution/pose3dDetector.ts +890 -0
  137. package/src/solution/poseDetector.ts +892 -0
  138. package/src/solution/poseTracker.ts +172 -0
  139. package/src/solution/wholebody.ts +130 -0
  140. package/src/solution/wholebody3d.ts +125 -0
  141. package/src/types/index.ts +62 -0
  142. package/src/visualization/draw.ts +543 -0
  143. package/src/visualization/skeleton/coco133.ts +131 -0
  144. package/src/visualization/skeleton/coco17.ts +49 -0
  145. package/src/visualization/skeleton/halpe26.ts +71 -0
  146. package/src/visualization/skeleton/hand21.ts +52 -0
  147. package/src/visualization/skeleton/index.ts +10 -0
  148. package/src/visualization/skeleton/openpose134.ts +125 -0
  149. package/src/visualization/skeleton/openpose18.ts +48 -0
  150. package/tsconfig.json +32 -0
@@ -0,0 +1,746 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>YOLO12 Person Detection</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ body {
15
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif;
16
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
17
+ min-height: 100vh;
18
+ color: #fff;
19
+ padding: 20px;
20
+ }
21
+
22
+ .container {
23
+ max-width: 1200px;
24
+ margin: 0 auto;
25
+ }
26
+
27
+ header {
28
+ text-align: center;
29
+ margin-bottom: 30px;
30
+ }
31
+
32
+ header h1 {
33
+ font-size: 2.5rem;
34
+ margin-bottom: 10px;
35
+ background: linear-gradient(90deg, #00d9ff, #00ff88);
36
+ -webkit-background-clip: text;
37
+ -webkit-text-fill-color: transparent;
38
+ }
39
+
40
+ header p {
41
+ color: #8892b0;
42
+ font-size: 1.1rem;
43
+ }
44
+
45
+ .main-content {
46
+ display: grid;
47
+ grid-template-columns: 1fr 300px;
48
+ gap: 20px;
49
+ }
50
+
51
+ @media (max-width: 768px) {
52
+ .main-content {
53
+ grid-template-columns: 1fr;
54
+ }
55
+ }
56
+
57
+ .canvas-container {
58
+ background: rgba(255, 255, 255, 0.05);
59
+ border-radius: 16px;
60
+ padding: 20px;
61
+ backdrop-filter: blur(10px);
62
+ border: 1px solid rgba(255, 255, 255, 0.1);
63
+ }
64
+
65
+ .canvas-wrapper {
66
+ position: relative;
67
+ width: 100%;
68
+ min-height: 400px;
69
+ display: flex;
70
+ align-items: center;
71
+ justify-content: center;
72
+ background: rgba(0, 0, 0, 0.3);
73
+ border-radius: 12px;
74
+ overflow: hidden;
75
+ }
76
+
77
+ canvas {
78
+ max-width: 100%;
79
+ max-height: 600px;
80
+ display: block;
81
+ }
82
+
83
+ .placeholder {
84
+ position: absolute;
85
+ text-align: center;
86
+ color: #8892b0;
87
+ }
88
+
89
+ .placeholder svg {
90
+ width: 64px;
91
+ height: 64px;
92
+ margin-bottom: 16px;
93
+ opacity: 0.5;
94
+ }
95
+
96
+ .sidebar {
97
+ display: flex;
98
+ flex-direction: column;
99
+ gap: 20px;
100
+ }
101
+
102
+ .panel {
103
+ background: rgba(255, 255, 255, 0.05);
104
+ border-radius: 16px;
105
+ padding: 20px;
106
+ backdrop-filter: blur(10px);
107
+ border: 1px solid rgba(255, 255, 255, 0.1);
108
+ }
109
+
110
+ .panel h2 {
111
+ font-size: 1.2rem;
112
+ margin-bottom: 15px;
113
+ color: #00d9ff;
114
+ }
115
+
116
+ .upload-area {
117
+ border: 2px dashed rgba(0, 217, 255, 0.3);
118
+ border-radius: 12px;
119
+ padding: 30px 20px;
120
+ text-align: center;
121
+ cursor: pointer;
122
+ transition: all 0.3s ease;
123
+ margin-bottom: 15px;
124
+ }
125
+
126
+ .upload-area:hover,
127
+ .upload-area.dragover {
128
+ border-color: #00d9ff;
129
+ background: rgba(0, 217, 255, 0.1);
130
+ }
131
+
132
+ .upload-area svg {
133
+ width: 48px;
134
+ height: 48px;
135
+ margin-bottom: 12px;
136
+ color: #00d9ff;
137
+ }
138
+
139
+ .upload-area p {
140
+ color: #8892b0;
141
+ font-size: 0.9rem;
142
+ }
143
+
144
+ #fileInput {
145
+ display: none;
146
+ }
147
+
148
+ .btn {
149
+ width: 100%;
150
+ padding: 12px 24px;
151
+ border: none;
152
+ border-radius: 8px;
153
+ font-size: 1rem;
154
+ font-weight: 600;
155
+ cursor: pointer;
156
+ transition: all 0.3s ease;
157
+ margin-bottom: 10px;
158
+ }
159
+
160
+ .btn-primary {
161
+ background: linear-gradient(90deg, #00d9ff, #00ff88);
162
+ color: #1a1a2e;
163
+ }
164
+
165
+ .btn-primary:hover {
166
+ transform: translateY(-2px);
167
+ box-shadow: 0 10px 30px rgba(0, 217, 255, 0.3);
168
+ }
169
+
170
+ .btn-primary:disabled {
171
+ opacity: 0.5;
172
+ cursor: not-allowed;
173
+ transform: none;
174
+ }
175
+
176
+ .btn-secondary {
177
+ background: rgba(255, 255, 255, 0.1);
178
+ color: #fff;
179
+ border: 1px solid rgba(255, 255, 255, 0.2);
180
+ }
181
+
182
+ .btn-secondary:hover {
183
+ background: rgba(255, 255, 255, 0.2);
184
+ }
185
+
186
+ .stats {
187
+ display: grid;
188
+ grid-template-columns: 1fr 1fr;
189
+ gap: 10px;
190
+ }
191
+
192
+ .stat-item {
193
+ background: rgba(0, 0, 0, 0.3);
194
+ padding: 15px;
195
+ border-radius: 8px;
196
+ text-align: center;
197
+ }
198
+
199
+ .stat-value {
200
+ font-size: 1.8rem;
201
+ font-weight: 700;
202
+ color: #00ff88;
203
+ }
204
+
205
+ .stat-label {
206
+ font-size: 0.8rem;
207
+ color: #8892b0;
208
+ margin-top: 5px;
209
+ }
210
+
211
+ .detections-list {
212
+ max-height: 300px;
213
+ overflow-y: auto;
214
+ }
215
+
216
+ .detection-item {
217
+ background: rgba(0, 0, 0, 0.3);
218
+ padding: 12px;
219
+ border-radius: 8px;
220
+ margin-bottom: 10px;
221
+ border-left: 3px solid #00ff88;
222
+ }
223
+
224
+ .detection-item .class-name {
225
+ font-weight: 600;
226
+ color: #00d9ff;
227
+ }
228
+
229
+ .detection-item .confidence {
230
+ font-size: 0.85rem;
231
+ color: #8892b0;
232
+ }
233
+
234
+ .detection-item .bbox {
235
+ font-size: 0.75rem;
236
+ color: #666;
237
+ margin-top: 5px;
238
+ font-family: monospace;
239
+ }
240
+
241
+ .status {
242
+ padding: 10px;
243
+ border-radius: 8px;
244
+ margin-bottom: 15px;
245
+ font-size: 0.9rem;
246
+ }
247
+
248
+ .status.loading {
249
+ background: rgba(0, 217, 255, 0.1);
250
+ color: #00d9ff;
251
+ }
252
+
253
+ .status.ready {
254
+ background: rgba(0, 255, 136, 0.1);
255
+ color: #00ff88;
256
+ }
257
+
258
+ .status.error {
259
+ background: rgba(255, 0, 0, 0.1);
260
+ color: #ff4444;
261
+ }
262
+
263
+ .spinner {
264
+ display: inline-block;
265
+ width: 16px;
266
+ height: 16px;
267
+ border: 2px solid rgba(0, 217, 255, 0.3);
268
+ border-top-color: #00d9ff;
269
+ border-radius: 50%;
270
+ animation: spin 1s linear infinite;
271
+ margin-right: 8px;
272
+ }
273
+
274
+ @keyframes spin {
275
+ to { transform: rotate(360deg); }
276
+ }
277
+
278
+ .settings {
279
+ margin-top: 15px;
280
+ }
281
+
282
+ .setting-item {
283
+ margin-bottom: 15px;
284
+ }
285
+
286
+ .setting-item label {
287
+ display: block;
288
+ margin-bottom: 5px;
289
+ color: #8892b0;
290
+ font-size: 0.9rem;
291
+ }
292
+
293
+ .setting-item input[type="range"] {
294
+ width: 100%;
295
+ height: 6px;
296
+ border-radius: 3px;
297
+ background: rgba(255, 255, 255, 0.1);
298
+ outline: none;
299
+ -webkit-appearance: none;
300
+ }
301
+
302
+ .setting-item input[type="range"]::-webkit-slider-thumb {
303
+ -webkit-appearance: none;
304
+ width: 18px;
305
+ height: 18px;
306
+ border-radius: 50%;
307
+ background: #00d9ff;
308
+ cursor: pointer;
309
+ }
310
+
311
+ .setting-value {
312
+ text-align: right;
313
+ font-size: 0.85rem;
314
+ color: #00d9ff;
315
+ }
316
+ </style>
317
+ </head>
318
+ <body>
319
+ <div class="container">
320
+ <header>
321
+ <h1>🎯 YOLO12 Person Detection</h1>
322
+ <p>Real-time person detection powered by ONNX Runtime Web</p>
323
+ </header>
324
+
325
+ <div class="main-content">
326
+ <div class="canvas-container">
327
+ <div class="canvas-wrapper">
328
+ <canvas id="canvas"></canvas>
329
+ <div class="placeholder" id="placeholder">
330
+ <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
331
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"></path>
332
+ </svg>
333
+ <p>Upload an image to detect people</p>
334
+ </div>
335
+ </div>
336
+ </div>
337
+
338
+ <div class="sidebar">
339
+ <div class="panel">
340
+ <h2>📁 Upload Image</h2>
341
+ <div class="upload-area" id="uploadArea">
342
+ <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
343
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"></path>
344
+ </svg>
345
+ <p>Drag & drop or click to upload</p>
346
+ </div>
347
+ <input type="file" id="fileInput" accept="image/*">
348
+
349
+ <div id="status" class="status loading">
350
+ <span class="spinner"></span>
351
+ Loading model...
352
+ </div>
353
+
354
+ <button class="btn btn-primary" id="detectBtn" disabled>Detect People</button>
355
+ <button class="btn btn-secondary" id="clearBtn">Clear</button>
356
+ </div>
357
+
358
+ <div class="panel">
359
+ <h2>📊 Statistics</h2>
360
+ <div class="stats">
361
+ <div class="stat-item">
362
+ <div class="stat-value" id="personCount">0</div>
363
+ <div class="stat-label">People Detected</div>
364
+ </div>
365
+ <div class="stat-item">
366
+ <div class="stat-value" id="inferenceTime">0</div>
367
+ <div class="stat-label">Time (ms)</div>
368
+ </div>
369
+ </div>
370
+
371
+ <div class="settings">
372
+ <div class="setting-item">
373
+ <label>Confidence Threshold</label>
374
+ <input type="range" id="confidenceSlider" min="0.1" max="0.9" step="0.05" value="0.5">
375
+ <div class="setting-value" id="confidenceValue">0.50</div>
376
+ </div>
377
+ </div>
378
+ </div>
379
+
380
+ <div class="panel">
381
+ <h2>🎯 Detections</h2>
382
+ <div class="detections-list" id="detectionsList">
383
+ <p style="color: #8892b0; font-size: 0.9rem;">No detections yet</p>
384
+ </div>
385
+ </div>
386
+ </div>
387
+ </div>
388
+ </div>
389
+
390
+ <script type="module">
391
+ import * as ort from 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/ort.min.js';
392
+
393
+ // Configure ONNX Runtime
394
+ ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
395
+ ort.env.wasm.simd = true;
396
+
397
+ // DOM Elements
398
+ const canvas = document.getElementById('canvas');
399
+ const ctx = canvas.getContext('2d');
400
+ const placeholder = document.getElementById('placeholder');
401
+ const uploadArea = document.getElementById('uploadArea');
402
+ const fileInput = document.getElementById('fileInput');
403
+ const detectBtn = document.getElementById('detectBtn');
404
+ const clearBtn = document.getElementById('clearBtn');
405
+ const statusEl = document.getElementById('status');
406
+ const personCountEl = document.getElementById('personCount');
407
+ const inferenceTimeEl = document.getElementById('inferenceTime');
408
+ const detectionsListEl = document.getElementById('detectionsList');
409
+ const confidenceSlider = document.getElementById('confidenceSlider');
410
+ const confidenceValue = document.getElementById('confidenceValue');
411
+
412
+ // State
413
+ let session = null;
414
+ let currentImage = null;
415
+ let currentImageData = null;
416
+ let confidenceThreshold = 0.5;
417
+
418
+ // Load model
419
+ async function loadModel() {
420
+ try {
421
+ const response = await fetch('https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx');
422
+ const modelBuffer = await response.arrayBuffer();
423
+
424
+ session = await ort.InferenceSession.create(modelBuffer, {
425
+ executionProviders: ['wasm'],
426
+ graphOptimizationLevel: 'all',
427
+ });
428
+
429
+ statusEl.className = 'status ready';
430
+ statusEl.innerHTML = '✓ Model ready';
431
+ detectBtn.disabled = false;
432
+ console.log('Model loaded successfully');
433
+ } catch (error) {
434
+ statusEl.className = 'status error';
435
+ statusEl.innerHTML = `✗ Error: ${error.message}`;
436
+ console.error('Failed to load model:', error);
437
+ }
438
+ }
439
+
440
+ // Initialize
441
+ loadModel();
442
+
443
+ // Image upload handlers
444
+ uploadArea.addEventListener('click', () => fileInput.click());
445
+
446
+ uploadArea.addEventListener('dragover', (e) => {
447
+ e.preventDefault();
448
+ uploadArea.classList.add('dragover');
449
+ });
450
+
451
+ uploadArea.addEventListener('dragleave', () => {
452
+ uploadArea.classList.remove('dragover');
453
+ });
454
+
455
+ uploadArea.addEventListener('drop', (e) => {
456
+ e.preventDefault();
457
+ uploadArea.classList.remove('dragover');
458
+ const file = e.dataTransfer.files[0];
459
+ if (file && file.type.startsWith('image/')) {
460
+ loadImage(file);
461
+ }
462
+ });
463
+
464
+ fileInput.addEventListener('change', (e) => {
465
+ const file = e.target.files[0];
466
+ if (file) {
467
+ loadImage(file);
468
+ }
469
+ });
470
+
471
+ // Load image
472
+ function loadImage(file) {
473
+ const reader = new FileReader();
474
+ reader.onload = (e) => {
475
+ const img = new Image();
476
+ img.onload = () => {
477
+ currentImage = img;
478
+ canvas.width = img.width;
479
+ canvas.height = img.height;
480
+ ctx.drawImage(img, 0, 0);
481
+ placeholder.style.display = 'none';
482
+ currentImageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
483
+ };
484
+ img.src = e.target.result;
485
+ };
486
+ reader.readAsDataURL(file);
487
+ }
488
+
489
+ // Preprocess image (letterbox with black padding)
490
+ function preprocessImage(imageData, inputSize = [640, 640]) {
491
+ const [inputW, inputH] = inputSize;
492
+ const imgWidth = imageData.width;
493
+ const imgHeight = imageData.height;
494
+
495
+ // Create padded canvas
496
+ const paddedCanvas = document.createElement('canvas');
497
+ const paddedCtx = paddedCanvas.getContext('2d');
498
+ paddedCanvas.width = inputW;
499
+ paddedCanvas.height = inputH;
500
+
501
+ // Fill with black
502
+ paddedCtx.fillStyle = '#000000';
503
+ paddedCtx.fillRect(0, 0, inputW, inputH);
504
+
505
+ // Calculate aspect ratio
506
+ const aspectRatio = imgWidth / imgHeight;
507
+ const targetAspectRatio = inputW / inputH;
508
+
509
+ let drawWidth, drawHeight, offsetX, offsetY;
510
+
511
+ if (aspectRatio > targetAspectRatio) {
512
+ drawWidth = inputW;
513
+ drawHeight = Math.floor(inputW / aspectRatio);
514
+ offsetX = 0;
515
+ offsetY = Math.floor((inputH - drawHeight) / 2);
516
+ } else {
517
+ drawHeight = inputH;
518
+ drawWidth = Math.floor(inputH * aspectRatio);
519
+ offsetX = Math.floor((inputW - drawWidth) / 2);
520
+ offsetY = 0;
521
+ }
522
+
523
+ // Draw image
524
+ const tempCanvas = document.createElement('canvas');
525
+ const tempCtx = tempCanvas.getContext('2d');
526
+ tempCanvas.width = imgWidth;
527
+ tempCanvas.height = imgHeight;
528
+ tempCtx.putImageData(imageData, 0, 0);
529
+
530
+ paddedCtx.drawImage(tempCanvas, 0, 0, imgWidth, imgHeight,
531
+ offsetX, offsetY, drawWidth, drawHeight);
532
+
533
+ const paddedImageData = paddedCtx.getImageData(0, 0, inputW, inputH);
534
+
535
+ // Convert to tensor (RGB, CHW format)
536
+ const data = new Float32Array(inputW * inputH * 3);
537
+ for (let i = 0; i < paddedImageData.data.length; i += 4) {
538
+ const pixelIndex = i / 4;
539
+ data[pixelIndex] = paddedImageData.data[i] / 255; // R
540
+ data[pixelIndex + inputW * inputH] = paddedImageData.data[i + 1] / 255; // G
541
+ data[pixelIndex + 2 * inputW * inputH] = paddedImageData.data[i + 2] / 255; // B
542
+ }
543
+
544
+ return {
545
+ tensor: new ort.Tensor('float32', data, [1, 3, inputH, inputW]),
546
+ paddingX: offsetX,
547
+ paddingY: offsetY,
548
+ scaleX: imgWidth / drawWidth,
549
+ scaleY: imgHeight / drawHeight
550
+ };
551
+ }
552
+
553
+ // Postprocess results
554
+ function postprocessResults(output, originalWidth, originalHeight, paddingX, paddingY, scaleX, scaleY) {
555
+ const outputData = output.data;
556
+ const numDetections = output.dims[1];
557
+ const detections = [];
558
+
559
+ for (let i = 0; i < numDetections; i++) {
560
+ const startIdx = i * 6;
561
+ const x1 = outputData[startIdx];
562
+ const y1 = outputData[startIdx + 1];
563
+ const x2 = outputData[startIdx + 2];
564
+ const y2 = outputData[startIdx + 3];
565
+ const confidence = outputData[startIdx + 4];
566
+ const classId = Math.round(outputData[startIdx + 5]);
567
+
568
+ // Filter by confidence and class (0 = person)
569
+ if (confidence < confidenceThreshold || classId !== 0) continue;
570
+
571
+ // Transform coordinates
572
+ const transformedX1 = (x1 - paddingX) * scaleX;
573
+ const transformedY1 = (y1 - paddingY) * scaleY;
574
+ const transformedX2 = (x2 - paddingX) * scaleX;
575
+ const transformedY2 = (y2 - paddingY) * scaleY;
576
+
577
+ detections.push({
578
+ x: Math.max(0, transformedX1),
579
+ y: Math.max(0, transformedY1),
580
+ width: Math.max(0, transformedX2 - transformedX1),
581
+ height: Math.max(0, transformedY2 - transformedY1),
582
+ confidence,
583
+ class: 'person'
584
+ });
585
+ }
586
+
587
+ // Apply NMS
588
+ return applyNMS(detections, 0.45);
589
+ }
590
+
591
+ // Non-Maximum Suppression
592
+ function applyNMS(detections, iouThreshold) {
593
+ if (detections.length === 0) return [];
594
+
595
+ detections.sort((a, b) => b.confidence - a.confidence);
596
+
597
+ const selected = [];
598
+ const used = new Set();
599
+
600
+ for (let i = 0; i < detections.length; i++) {
601
+ if (used.has(i)) continue;
602
+
603
+ selected.push(detections[i]);
604
+ used.add(i);
605
+
606
+ for (let j = i + 1; j < detections.length; j++) {
607
+ if (used.has(j)) continue;
608
+
609
+ const iou = calculateIoU(detections[i], detections[j]);
610
+ if (iou > iouThreshold) {
611
+ used.add(j);
612
+ }
613
+ }
614
+ }
615
+
616
+ return selected;
617
+ }
618
+
619
+ // Calculate IoU
620
+ function calculateIoU(det1, det2) {
621
+ const x1 = Math.max(det1.x, det2.x);
622
+ const y1 = Math.max(det1.y, det2.y);
623
+ const x2 = Math.min(det1.x + det1.width, det2.x + det2.width);
624
+ const y2 = Math.min(det1.y + det1.height, det2.y + det2.height);
625
+
626
+ if (x2 <= x1 || y2 <= y1) return 0;
627
+
628
+ const intersection = (x2 - x1) * (y2 - y1);
629
+ const area1 = det1.width * det1.height;
630
+ const area2 = det2.width * det2.height;
631
+ const union = area1 + area2 - intersection;
632
+
633
+ return intersection / union;
634
+ }
635
+
636
+ // Draw detections
637
+ function drawDetections(detections) {
638
+ if (!currentImage) return;
639
+
640
+ // Redraw original image
641
+ ctx.drawImage(currentImage, 0, 0);
642
+
643
+ // Draw boxes
644
+ detections.forEach((det, index) => {
645
+ const color = `hsl(${160 + index * 20}, 100%, 50%)`;
646
+
647
+ ctx.strokeStyle = color;
648
+ ctx.lineWidth = 3;
649
+ ctx.strokeRect(det.x, det.y, det.width, det.height);
650
+
651
+ // Draw label background
652
+ const label = `${det.class} ${(det.confidence * 100).toFixed(0)}%`;
653
+ ctx.font = '14px -apple-system, sans-serif';
654
+ const textWidth = ctx.measureText(label).width;
655
+
656
+ ctx.fillStyle = color;
657
+ ctx.fillRect(det.x, det.y - 24, textWidth + 12, 24);
658
+
659
+ ctx.fillStyle = '#000';
660
+ ctx.fillText(label, det.x + 6, det.y - 7);
661
+ });
662
+ }
663
+
664
+ // Update detections list
665
+ function updateDetectionsList(detections) {
666
+ if (detections.length === 0) {
667
+ detectionsListEl.innerHTML = '<p style="color: #8892b0; font-size: 0.9rem;">No detections</p>';
668
+ return;
669
+ }
670
+
671
+ detectionsListEl.innerHTML = detections.map((det, i) => `
672
+ <div class="detection-item">
673
+ <div class="class-name">Person ${i + 1}</div>
674
+ <div class="confidence">Confidence: ${(det.confidence * 100).toFixed(1)}%</div>
675
+ <div class="bbox">[${det.x.toFixed(0)}, ${det.y.toFixed(0)}, ${det.width.toFixed(0)}x${det.height.toFixed(0)}]</div>
676
+ </div>
677
+ `).join('');
678
+ }
679
+
680
+ // Detect
681
+ detectBtn.addEventListener('click', async () => {
682
+ if (!session || !currentImageData) return;
683
+
684
+ detectBtn.disabled = true;
685
+ detectBtn.textContent = 'Detecting...';
686
+
687
+ try {
688
+ const startTime = performance.now();
689
+
690
+ // Preprocess
691
+ const { tensor, paddingX, paddingY, scaleX, scaleY } = preprocessImage(currentImageData);
692
+
693
+ // Inference
694
+ const results = await session.run({ images: tensor });
695
+ const output = results.output0;
696
+
697
+ // Postprocess
698
+ const detections = postprocessResults(
699
+ output,
700
+ currentImageData.width,
701
+ currentImageData.height,
702
+ paddingX,
703
+ paddingY,
704
+ scaleX,
705
+ scaleY
706
+ );
707
+
708
+ const endTime = performance.now();
709
+ const inferenceTime = Math.round(endTime - startTime);
710
+
711
+ // Update UI
712
+ drawDetections(detections);
713
+ personCountEl.textContent = detections.length;
714
+ inferenceTimeEl.textContent = inferenceTime;
715
+ updateDetectionsList(detections);
716
+
717
+ console.log(`Detected ${detections.length} people in ${inferenceTime}ms`);
718
+ } catch (error) {
719
+ console.error('Detection failed:', error);
720
+ alert('Detection failed: ' + error.message);
721
+ } finally {
722
+ detectBtn.disabled = false;
723
+ detectBtn.textContent = 'Detect People';
724
+ }
725
+ });
726
+
727
+ // Clear
728
+ clearBtn.addEventListener('click', () => {
729
+ ctx.clearRect(0, 0, canvas.width, canvas.height);
730
+ placeholder.style.display = 'block';
731
+ currentImage = null;
732
+ currentImageData = null;
733
+ personCountEl.textContent = '0';
734
+ inferenceTimeEl.textContent = '0';
735
+ detectionsListEl.innerHTML = '<p style="color: #8892b0; font-size: 0.9rem;">No detections yet</p>';
736
+ fileInput.value = '';
737
+ });
738
+
739
+ // Confidence slider
740
+ confidenceSlider.addEventListener('input', (e) => {
741
+ confidenceThreshold = parseFloat(e.target.value);
742
+ confidenceValue.textContent = confidenceThreshold.toFixed(2);
743
+ });
744
+ </script>
745
+ </body>
746
+ </html>