scrypted-detection-trainer 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scrypted-detection-trainer",
3
- "version": "0.1.9",
3
+ "version": "0.1.11",
4
4
  "description": "Collect and label detections to fine-tune the Scrypted NVR object detection model.",
5
5
  "keywords": [
6
6
  "scrypted-plugin"
package/src/main.ts CHANGED
@@ -157,6 +157,13 @@ class DetectionTrainer extends ScryptedDeviceBase implements Settings, HttpReque
157
157
  readonly: true,
158
158
  value: `<a href="${uiUrl}" target="_blank" style="display:inline-block;padding:8px 16px;background:#1a4d8a;color:#fff;border-radius:6px;text-decoration:none;font-size:13px;">Open Review UI ↗</a>`,
159
159
  },
160
+ {
161
+ key: 'autoCapture',
162
+ title: 'Auto-Capture',
163
+ description: 'Automatically capture detections in the background. Disable to use manual browsing only.',
164
+ type: 'boolean',
165
+ value: (this.storage.getItem('autoCapture') ?? 'true'),
166
+ },
160
167
  ];
161
168
 
162
169
  for (const cam of cameras) {
@@ -214,6 +221,7 @@ class DetectionTrainer extends ScryptedDeviceBase implements Settings, HttpReque
214
221
  // ── Detection Handler ─────────────────────────────────────────────────────
215
222
 
216
223
  private async onDetection(cameraId: string, cameraName: string, data: ObjectsDetected, rateLimitMs: number) {
224
+ if ((this.storage.getItem('autoCapture') ?? 'true') === 'false') return;
217
225
  if (!data?.detections?.length || !data.inputDimensions) return;
218
226
 
219
227
  // Rate limit per camera
@@ -283,6 +291,22 @@ class DetectionTrainer extends ScryptedDeviceBase implements Settings, HttpReque
283
291
  const url = new URL(request.url, 'http://localhost');
284
292
  const path = url.pathname.replace(request.rootPath, '');
285
293
 
294
+ // Serve browse event image via getVideoClipThumbnail
295
+ if (path === '/api/browse-img') {
296
+ const params = new URL(request.url, 'http://localhost').searchParams;
297
+ const cameraId = params.get('cameraId')?.replace(/[^a-zA-Z0-9_\-]/g, '');
298
+ const thumbnailId = params.get('thumbnailId')?.replace(/[^a-zA-Z0-9_\-:.]/g, '');
299
+ if (!cameraId || !thumbnailId) return response.send('Missing params', { code: 400 });
300
+ try {
301
+ const cam = systemManager.getDeviceById(cameraId) as any;
302
+ const mo = await cam.getVideoClipThumbnail(thumbnailId);
303
+ const jpeg = await mediaManager.convertMediaObjectToBuffer(mo, 'image/jpeg');
304
+ return response.send(jpeg, { headers: { 'Content-Type': 'image/jpeg', 'Cache-Control': 'max-age=3600' } });
305
+ } catch (e) {
306
+ return response.send('Image unavailable', { code: 404 });
307
+ }
308
+ }
309
+
286
310
  // Serve image
287
311
  if (path.startsWith('/img/')) {
288
312
  const id = path.slice(5).replace(/[^a-zA-Z0-9_\-]/g, ''); // sanitize
@@ -308,6 +332,91 @@ class DetectionTrainer extends ScryptedDeviceBase implements Settings, HttpReque
308
332
  return response.send(JSON.stringify({ ok: true }), { headers: { 'Content-Type': 'application/json' } });
309
333
  }
310
334
 
335
+ // API: list cameras for browse
336
+ if (path === '/api/cameras') {
337
+ const cameras = Object.keys(systemManager.getSystemState())
338
+ .map(id => systemManager.getDeviceById(id))
339
+ .filter(d => d &&
340
+ (d.type === ScryptedDeviceType.Camera || d.type === ScryptedDeviceType.Doorbell) &&
341
+ d.interfaces?.includes(ScryptedInterface.ObjectDetector)
342
+ )
343
+ .map(d => ({ id: d.id, name: d.name }));
344
+ return response.send(JSON.stringify(cameras), { headers: { 'Content-Type': 'application/json' } });
345
+ }
346
+
347
+ // API: browse recent events for a camera
348
+ if (path === '/api/browse') {
349
+ const params = new URL(request.url, 'http://localhost').searchParams;
350
+ const cameraId = params.get('cameraId');
351
+ const hours = parseInt(params.get('hours') || '24');
352
+ if (!cameraId) return response.send('Missing cameraId', { code: 400 });
353
+
354
+ try {
355
+ const cam = systemManager.getDeviceById(cameraId) as any;
356
+ if (!cam) return response.send('Camera not found', { code: 404 });
357
+
358
+ const endTime = Date.now();
359
+ const startTime = endTime - hours * 3600 * 1000;
360
+ const clips = await cam.getVideoClips({ startTime, endTime });
361
+
362
+ const events = (clips || [])
363
+ .filter((c: any) => c.detectionClasses?.length && c.thumbnailId)
364
+ .slice(0, 100)
365
+ .map((c: any) => ({
366
+ clipId: c.id,
367
+ thumbnailId: c.thumbnailId,
368
+ timestamp: c.startTime,
369
+ detectionClasses: c.detectionClasses || [],
370
+ // bounding box not available at clip level — use full frame
371
+ boundingBox: null,
372
+ inputDimensions: null,
373
+ cameraId,
374
+ cameraName: cam.name,
375
+ }));
376
+
377
+ return response.send(JSON.stringify(events), { headers: { 'Content-Type': 'application/json' } });
378
+ } catch (e: any) {
379
+ return response.send(JSON.stringify({ error: e.message }), { headers: { 'Content-Type': 'application/json' }, code: 500 });
380
+ }
381
+ }
382
+
383
+ // API: add a browsed event directly to dataset as labeled
384
+ if (path === '/api/add-event' && request.body) {
385
+ const rawBody = request.body as any;
386
+ const body = JSON.parse(typeof rawBody === 'string' ? rawBody : Buffer.isBuffer(rawBody) ? rawBody.toString() : String(rawBody));
387
+ const { cameraId, cameraName, thumbnailId, timestamp, detectedClass, boundingBox, inputDimensions, label } = body;
388
+
389
+ if (!label || label === 'discard') return response.send(JSON.stringify({ ok: true }), { headers: { 'Content-Type': 'application/json' } });
390
+
391
+ // Get image via thumbnail
392
+ let jpeg: Buffer | undefined;
393
+ try {
394
+ const cam = systemManager.getDeviceById(cameraId) as any;
395
+ const mo = await cam.getVideoClipThumbnail(thumbnailId);
396
+ jpeg = await mediaManager.convertMediaObjectToBuffer(mo, 'image/jpeg');
397
+ } catch (e) {
398
+ this.console.warn(`Could not get thumbnail for browse event:`, e);
399
+ }
400
+
401
+ if (!jpeg) return response.send(JSON.stringify({ error: 'Could not retrieve image' }), { headers: { 'Content-Type': 'application/json' }, code: 500 });
402
+
403
+ const id = `browse-${timestamp}-${Math.random().toString(36).slice(2, 6)}`;
404
+ // For clips we don't have per-detection bounding boxes — store full frame dimensions
405
+ const record: CaptureRecord = {
406
+ id, cameraId, cameraName, timestamp,
407
+ detectedClass: detectedClass || 'unknown',
408
+ score: 1,
409
+ boundingBox: boundingBox || [0, 0, inputDimensions?.[0] || 1920, inputDimensions?.[1] || 1080],
410
+ inputDimensions: inputDimensions || [1920, 1080],
411
+ reviewed: true, label,
412
+ };
413
+ this.captures.set(id, record);
414
+ this.saveImage(id, jpeg);
415
+ this.saveCaptures();
416
+
417
+ return response.send(JSON.stringify({ ok: true, id }), { headers: { 'Content-Type': 'application/json' } });
418
+ }
419
+
311
420
  // API: get pending captures
312
421
  if (path === '/api/pending') {
313
422
  const pending = [...this.captures.values()]
@@ -508,6 +617,7 @@ class DetectionTrainer extends ScryptedDeviceBase implements Settings, HttpReque
508
617
  <div class="card">
509
618
  <div class="tab-bar">
510
619
  <div class="tab active" onclick="showTab('review')">Review</div>
620
+ <div class="tab" onclick="showTab('browse')">Browse Events</div>
511
621
  <div class="tab" onclick="showTab('labeled')">Labeled</div>
512
622
  <div class="tab" onclick="showTab('stats')">Stats</div>
513
623
  <div class="tab" onclick="showTab('export')">Export Dataset</div>
@@ -518,12 +628,30 @@ class DetectionTrainer extends ScryptedDeviceBase implements Settings, HttpReque
518
628
  <div id="detections-list"></div>
519
629
  </div>
520
630
 
631
+ <!-- Browse tab -->
632
+ <div class="tab-panel" id="tab-browse">
633
+ <div class="tab-content">
634
+ <div style="display:flex;gap:12px;align-items:center;flex-wrap:wrap;margin-bottom:16px;">
635
+ <select id="browse-camera" style="padding:8px 12px;background:#222;border:1px solid #444;color:#fff;border-radius:6px;font-size:13px;">
636
+ <option value="">Select camera…</option>
637
+ </select>
638
+ <select id="browse-hours" style="padding:8px 12px;background:#222;border:1px solid #444;color:#fff;border-radius:6px;font-size:13px;">
639
+ <option value="1">Last 1 hour</option>
640
+ <option value="6">Last 6 hours</option>
641
+ <option value="24" selected>Last 24 hours</option>
642
+ <option value="72">Last 3 days</option>
643
+ </select>
644
+ <button class="export-btn" onclick="loadBrowse()" style="padding:8px 16px;">Load Events</button>
645
+ <span id="browse-status" style="font-size:13px;color:#888;"></span>
646
+ </div>
647
+ <div id="browse-list"></div>
648
+ </div>
649
+ </div>
650
+
521
651
  <!-- Labeled tab -->
522
652
  <div class="tab-panel" id="tab-labeled">
523
653
  <div id="labeled-list"></div>
524
654
  </div>
525
-
526
- <!-- Stats tab -->
527
655
  <div class="tab-panel" id="tab-stats">
528
656
  <div class="tab-content">
529
657
  <p style="font-size:13px;color:#888;margin-bottom:16px;">Breakdown of captured and labeled detections.</p>
@@ -670,6 +798,22 @@ function openLightbox(r) {
670
798
  document.addEventListener('keydown', lbKeyHandler);
671
799
  }
672
800
 
801
+ function openLightboxImg(img, cameraName, timestamp) {
802
+ const lb = document.getElementById('lightbox');
803
+ const lbCanvas = document.getElementById('lightbox-canvas');
804
+ const maxW = window.innerWidth * 0.9;
805
+ const maxH = window.innerHeight * 0.8;
806
+ const scale = Math.min(maxW / img.naturalWidth, maxH / img.naturalHeight, 1);
807
+ lbCanvas.width = Math.round(img.naturalWidth * scale);
808
+ lbCanvas.height = Math.round(img.naturalHeight * scale);
809
+ const ctx = lbCanvas.getContext('2d');
810
+ ctx.drawImage(img, 0, 0, lbCanvas.width, lbCanvas.height);
811
+ document.getElementById('lightbox-meta').textContent =
812
+ cameraName + ' · ' + new Date(timestamp).toLocaleString();
813
+ lb.classList.add('open');
814
+ document.addEventListener('keydown', lbKeyHandler);
815
+ }
816
+
673
817
  function closeLightbox() {
674
818
  document.getElementById('lightbox').classList.remove('open');
675
819
  document.removeEventListener('keydown', lbKeyHandler);
@@ -680,7 +824,7 @@ function lbKeyHandler(e) {
680
824
  }
681
825
 
682
826
  function showTab(name) {
683
- const names = ['review', 'labeled', 'stats', 'export'];
827
+ const names = ['review', 'browse', 'labeled', 'stats', 'export'];
684
828
  document.querySelectorAll('.tab').forEach((t, i) => {
685
829
  t.classList.toggle('active', names[i] === name);
686
830
  });
@@ -689,6 +833,250 @@ function showTab(name) {
689
833
  if (name === 'stats') loadStats();
690
834
  if (name === 'export') loadExportInfo();
691
835
  if (name === 'labeled') loadLabeled(0);
836
+ if (name === 'browse') initBrowse();
837
+ }
838
+
839
+ async function initBrowse() {
840
+ const sel = document.getElementById('browse-camera');
841
+ if (sel.options.length > 1) return; // already loaded
842
+ try {
843
+ const res = await fetch(BASE + '/api/cameras');
844
+ const cameras = await res.json();
845
+ for (const cam of cameras) {
846
+ const opt = document.createElement('option');
847
+ opt.value = cam.id;
848
+ opt.textContent = cam.name;
849
+ sel.appendChild(opt);
850
+ }
851
+ if (cameras.length === 1) sel.value = cameras[0].id;
852
+ } catch(e) {
853
+ document.getElementById('browse-status').textContent = 'Error loading cameras';
854
+ }
855
+ }
856
+
857
+ async function loadBrowse() {
858
+ const cameraId = document.getElementById('browse-camera').value;
859
+ const hours = document.getElementById('browse-hours').value;
860
+ const status = document.getElementById('browse-status');
861
+ const list = document.getElementById('browse-list');
862
+
863
+ if (!cameraId) { status.textContent = 'Select a camera first'; return; }
864
+
865
+ status.textContent = 'Loading…';
866
+ list.innerHTML = '';
867
+
868
+ try {
869
+ const res = await fetch(BASE + '/api/browse?cameraId=' + cameraId + '&hours=' + hours);
870
+ const events = await res.json();
871
+
872
+ if (events.error) { status.textContent = 'Error: ' + events.error; return; }
873
+ if (!events.length) { status.textContent = 'No detection events found.'; list.innerHTML = '<div class="empty"><div class="icon">🔍</div><div>No ObjectDetector events in this time range.</div></div>'; return; }
874
+
875
+ status.textContent = events.length + ' events found';
876
+
877
+ list.innerHTML = events.map((ev, i) => {
878
+ const date = new Date(ev.timestamp).toLocaleString();
879
+ const dets = ev.detections || [];
880
+ const primary = dets[0] || {};
881
+ const score = Math.round((primary.score || 0) * 100);
882
+ const allClasses = dets.map(d => d.className + ' ' + Math.round((d.score||0)*100) + '%').join(', ');
883
+ return \`
884
+ <div class="detection" id="bev-\${i}" style="opacity:1;transition:opacity .3s">
885
+ <div class="detection-imgs">
886
+ <div class="img-panel">
887
+ <div class="img-label">Full frame</div>
888
+ <canvas id="bcanvas-\${i}" class="det-canvas" width="240" height="160"></canvas>
889
+ </div>
890
+ <div class="img-panel" id="bcrop-panel-\${i}">
891
+ <div class="img-label">Crop</div>
892
+ <canvas id="bcanvas-crop-\${i}" class="det-canvas" width="160" height="160"></canvas>
893
+ </div>
894
+ </div>
895
+ <div class="detection-info">
896
+ <div class="detection-meta">
897
+ <div><strong>\${ev.cameraName}</strong></div>
898
+ <div>\${date}</div>
899
+ <div class="det-class-badge">\${allClasses}</div>
900
+ </div>
901
+ <div style="font-size:12px;color:#888;">Add to dataset as:</div>
902
+ <div class="label-buttons" id="blabels-\${i}">
903
+ <button class="label-btn person" onclick="addEvent(\${i})('person')">👤 Person</button>
904
+ <button class="label-btn animal" onclick="addEvent(\${i})('animal')">🐾 Animal</button>
905
+ <button class="label-btn face" onclick="addEvent(\${i})('face')">😀 Face</button>
906
+ <button class="label-btn vehicle" onclick="addEvent(\${i})('vehicle')">🚗 Vehicle</button>
907
+ <button class="label-btn" onclick="addEvent(\${i})('plate')">🔢 Plate</button>
908
+ <button class="label-btn" onclick="addEvent(\${i})('package')">📦 Package</button>
909
+ <button class="label-btn discard" onclick="addEvent(\${i})('discard')">🗑 Skip</button>
910
+ </div>
911
+ </div>
912
+ </div>\`;
913
+ }).join('');
914
+
915
+ // Load images for each event
916
+ for (let i = 0; i < events.length; i++) {
917
+ const ev = events[i];
918
+ loadBrowseImage(i, ev);
919
+ }
920
+
921
+ } catch(e) {
922
+ status.textContent = 'Error: ' + e.message;
923
+ }
924
+ }
925
+
926
+ // Store browse events for addEvent closure
927
+ let browseEvents = [];
928
+
929
+ async function loadBrowse() {
930
+ const cameraId = document.getElementById('browse-camera').value;
931
+ const hours = document.getElementById('browse-hours').value;
932
+ const status = document.getElementById('browse-status');
933
+ const list = document.getElementById('browse-list');
934
+
935
+ if (!cameraId) { status.textContent = 'Select a camera first'; return; }
936
+
937
+ status.textContent = 'Loading…';
938
+ list.innerHTML = '';
939
+ browseEvents = [];
940
+
941
+ try {
942
+ const res = await fetch(BASE + '/api/browse?cameraId=' + cameraId + '&hours=' + hours);
943
+ const events = await res.json();
944
+
945
+ if (events.error) { status.textContent = 'Error: ' + events.error; return; }
946
+ if (!events.length) {
947
+ status.textContent = 'No detection events found.';
948
+ list.innerHTML = '<div class="empty"><div class="icon">🔍</div><div>No ObjectDetector events in this time range.</div></div>';
949
+ return;
950
+ }
951
+
952
+ browseEvents = events;
953
+ status.textContent = events.length + ' events';
954
+
955
+ list.innerHTML = events.map((ev, i) => {
956
+ const date = new Date(ev.timestamp).toLocaleString();
957
+ const dets = ev.detections || [];
958
+ const allClasses = [...new Set(dets.map(d => d.className))].join(', ');
959
+ return \`
960
+ <div class="detection" id="bev-\${i}">
961
+ <div class="detection-imgs">
962
+ <div class="img-panel">
963
+ <div class="img-label">Full frame</div>
964
+ <canvas id="bcanvas-full-\${i}" class="det-canvas" width="240" height="160"></canvas>
965
+ </div>
966
+ <div class="img-panel">
967
+ <div class="img-label">Crop</div>
968
+ <canvas id="bcanvas-crop-\${i}" class="det-canvas" width="160" height="160"></canvas>
969
+ </div>
970
+ </div>
971
+ <div class="detection-info">
972
+ <div class="detection-meta">
973
+ <div><strong>\${ev.cameraName}</strong></div>
974
+ <div>\${date}</div>
975
+ <div class="det-class-badge">\${allClasses}</div>
976
+ </div>
977
+ <div style="font-size:12px;color:#888;">Add to dataset as:</div>
978
+ <div class="label-buttons">
979
+ <button class="label-btn person" onclick="addBrowseEvent(\${i},'person')">👤 Person</button>
980
+ <button class="label-btn animal" onclick="addBrowseEvent(\${i},'animal')">🐾 Animal</button>
981
+ <button class="label-btn face" onclick="addBrowseEvent(\${i},'face')">😀 Face</button>
982
+ <button class="label-btn vehicle" onclick="addBrowseEvent(\${i},'vehicle')">🚗 Vehicle</button>
983
+ <button class="label-btn" onclick="addBrowseEvent(\${i},'plate')">🔢 Plate</button>
984
+ <button class="label-btn" onclick="addBrowseEvent(\${i},'package')">📦 Package</button>
985
+ <button class="label-btn discard" onclick="addBrowseEvent(\${i},'discard')">🗑 Skip</button>
986
+ </div>
987
+ </div>
988
+ </div>\`;
989
+ }).join('');
990
+
991
+ // Load thumbnails for each event
992
+ for (let i = 0; i < events.length; i++) {
993
+ loadBrowseImage(i, events[i]);
994
+ }
995
+
996
+ } catch(e) {
997
+ status.textContent = 'Error: ' + e.message;
998
+ }
999
+ }
1000
+
1001
+ function loadBrowseImage(i, ev) {
1002
+ fetch(BASE + '/api/browse-img?cameraId=' + ev.cameraId + '&thumbnailId=' + encodeURIComponent(ev.thumbnailId))
1003
+ .then(r => r.ok ? r.blob() : null)
1004
+ .then(blob => {
1005
+ if (!blob) return;
1006
+ const url = URL.createObjectURL(blob);
1007
+ const img = new Image();
1008
+ img.onload = () => {
1009
+ imgCache.set('browse-' + i, img);
1010
+ const fullCanvas = document.getElementById('bcanvas-full-' + i);
1011
+ const cropCanvas = document.getElementById('bcanvas-crop-' + i);
1012
+ const iw = img.naturalWidth, ih = img.naturalHeight;
1013
+ // No bounding box for clip thumbnails — just draw the full image
1014
+ if (fullCanvas) {
1015
+ const ctx = fullCanvas.getContext('2d');
1016
+ const cw = fullCanvas.width, ch = fullCanvas.height;
1017
+ const scale = Math.min(cw / iw, ch / ih);
1018
+ const dw = iw * scale, dh = ih * scale;
1019
+ ctx.fillStyle = '#111'; ctx.fillRect(0, 0, cw, ch);
1020
+ ctx.drawImage(img, (cw-dw)/2, (ch-dh)/2, dw, dh);
1021
+ // Label classes
1022
+ const labels = (ev.detectionClasses || []).join(', ');
1023
+ ctx.fillStyle = 'rgba(0,0,0,0.6)'; ctx.fillRect(0, ch-20, cw, 20);
1024
+ ctx.fillStyle = '#f90'; ctx.font = 'bold 11px sans-serif';
1025
+ ctx.fillText(labels, 4, ch-5);
1026
+ fullCanvas.onclick = () => openLightboxImg(img, ev.cameraName, ev.timestamp);
1027
+ }
1028
+ // Hide crop panel — no bounding box available
1029
+ const cropPanel = document.getElementById('bcanvas-crop-' + i)?.closest('.img-panel') as HTMLElement;
1030
+ if (cropPanel) cropPanel.style.display = 'none';
1031
+ URL.revokeObjectURL(url);
1032
+ };
1033
+ img.src = url;
1034
+ }).catch(() => {});
1035
+ }
1036
+
1037
+ async function addBrowseEvent(i, label) {
1038
+ const ev = browseEvents[i];
1039
+ if (!ev) return;
1040
+ const el = document.getElementById('bev-' + i);
1041
+ if (el) { el.style.opacity = '0.4'; el.querySelectorAll('button').forEach(b => b.disabled = true); }
1042
+
1043
+ if (label !== 'discard') {
1044
+ try {
1045
+ const res = await fetch(BASE + '/api/add-event', {
1046
+ method: 'POST',
1047
+ headers: { 'Content-Type': 'application/json' },
1048
+ body: JSON.stringify({
1049
+ cameraId: ev.cameraId,
1050
+ cameraName: ev.cameraName,
1051
+ thumbnailId: ev.thumbnailId,
1052
+ timestamp: ev.timestamp,
1053
+ detectedClass: (ev.detectionClasses || [])[0] || 'unknown',
1054
+ boundingBox: null,
1055
+ inputDimensions: null,
1056
+ label,
1057
+ }),
1058
+ });
1059
+ const data = await res.json();
1060
+ if (data.error) { toast('Error: ' + data.error, '#633'); if (el) el.style.opacity = '1'; el?.querySelectorAll('button').forEach(b => b.disabled = false); return; }
1061
+ toast('Added: ' + label, '#1a6');
1062
+ } catch(e) {
1063
+ toast('Failed: ' + e.message, '#633');
1064
+ if (el) el.style.opacity = '1';
1065
+ el?.querySelectorAll('button').forEach(b => b.disabled = false);
1066
+ return;
1067
+ }
1068
+ } else {
1069
+ toast('Skipped', '#555');
1070
+ }
1071
+
1072
+ // Remove from list after short delay
1073
+ setTimeout(() => { if (el) el.remove(); }, 400);
1074
+
1075
+ // Update stats
1076
+ const statsRes = await fetch(BASE + '/api/stats');
1077
+ const stats = await statsRes.json();
1078
+ document.getElementById('stat-labeled').textContent = stats.labeled;
1079
+ document.getElementById('stat-total').textContent = stats.total;
692
1080
  }
693
1081
 
694
1082
  const LABEL_COLORS = { person:'#4d9', animal:'#d85', face:'#6be', vehicle:'#99d', plate:'#cc9', package:'#fc9', discard:'#a44' };