@blueharford/scrypted-spatial-awareness 0.6.33 → 0.6.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blueharford/scrypted-spatial-awareness",
3
- "version": "0.6.33",
3
+ "version": "0.6.34",
4
4
  "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
5
5
  "author": "Joshua Seidel <blueharford>",
6
6
  "license": "Apache-2.0",
@@ -231,6 +231,13 @@ export class TopologyDiscoveryEngine {
231
231
  return { ...this.status };
232
232
  }
233
233
 
234
+ /** Get list of LLMs excluded for lack of vision support */
235
+ getExcludedVisionLlmNames(): string[] {
236
+ return this.llmDevices
237
+ .filter(l => !l.visionCapable)
238
+ .map(l => l.name || l.id);
239
+ }
240
+
234
241
  /** Get pending suggestions */
235
242
  getPendingSuggestions(): DiscoverySuggestion[] {
236
243
  return Array.from(this.suggestions.values())
@@ -262,6 +269,7 @@ export class TopologyDiscoveryEngine {
262
269
  providerType: LlmProvider;
263
270
  lastUsed: number;
264
271
  errorCount: number;
272
+ visionCapable: boolean;
265
273
  }> = [];
266
274
 
267
275
  /** Find ALL LLM devices for load balancing */
@@ -294,6 +302,7 @@ export class TopologyDiscoveryEngine {
294
302
  providerType,
295
303
  lastUsed: 0,
296
304
  errorCount: 0,
305
+ visionCapable: true,
297
306
  });
298
307
 
299
308
  this.console.log(`[Discovery] Found LLM: ${device.name}`);
@@ -348,6 +357,48 @@ export class TopologyDiscoveryEngine {
348
357
  return selected.device;
349
358
  }
350
359
 
360
+ /** Select an LLM device, excluding any IDs if provided */
361
+ private async selectLlmDevice(excludeIds: Set<string>): Promise<ChatCompletionDevice | null> {
362
+ await this.findAllLlmDevices();
363
+
364
+ if (this.llmDevices.length === 0) return null;
365
+
366
+ let bestIndex = -1;
367
+ let bestScore = Infinity;
368
+
369
+ for (let i = 0; i < this.llmDevices.length; i++) {
370
+ const llm = this.llmDevices[i];
371
+ if (excludeIds.has(llm.id)) continue;
372
+ if (!llm.visionCapable) continue;
373
+ const score = llm.lastUsed + (llm.errorCount * 60000);
374
+ if (score < bestScore) {
375
+ bestScore = score;
376
+ bestIndex = i;
377
+ }
378
+ }
379
+
380
+ if (bestIndex === -1) return null;
381
+
382
+ const selected = this.llmDevices[bestIndex];
383
+ this.llmDevice = selected.device;
384
+ this.llmProviderType = selected.providerType;
385
+ selected.lastUsed = Date.now();
386
+
387
+ this.console.log(`[Discovery] Selected LLM: ${selected.name}`);
388
+ return selected.device;
389
+ }
390
+
391
+ private isRetryableLlmError(error: any): boolean {
392
+ const errorStr = String(error).toLowerCase();
393
+ return (
394
+ errorStr.includes('404') ||
395
+ errorStr.includes('not found') ||
396
+ errorStr.includes('no such model') ||
397
+ errorStr.includes('model not found') ||
398
+ errorStr.includes('endpoint')
399
+ );
400
+ }
401
+
351
402
  /** Mark an LLM as having an error */
352
403
  private markLlmError(device: ChatCompletionDevice): void {
353
404
  const llm = this.llmDevices.find(l => l.device === device);
@@ -406,42 +457,49 @@ export class TopologyDiscoveryEngine {
406
457
  isValid: false,
407
458
  };
408
459
 
409
- const llm = await this.findLlmDevice();
410
- if (!llm?.getChatCompletion) {
411
- analysis.error = 'No LLM device available';
412
- return analysis;
413
- }
414
-
415
460
  const imageData = await this.getCameraSnapshot(cameraId);
416
461
  if (!imageData) {
417
462
  analysis.error = 'Failed to capture camera snapshot';
418
463
  return analysis;
419
464
  }
420
465
 
421
- // Try with detected provider format first, then fallback to alternates
422
- // The order matters: try the most likely formats first
423
- const formatsToTry: LlmProvider[] = [];
424
-
425
- // Start with detected format
426
- formatsToTry.push(this.llmProviderType);
427
-
428
- // Add fallbacks based on detected provider
429
- if (this.llmProviderType === 'openai') {
430
- formatsToTry.push('scrypted', 'anthropic');
431
- } else if (this.llmProviderType === 'anthropic') {
432
- formatsToTry.push('scrypted', 'openai');
433
- } else if (this.llmProviderType === 'scrypted') {
434
- formatsToTry.push('anthropic', 'openai');
435
- } else {
436
- // Unknown - try all formats
437
- formatsToTry.push('scrypted', 'anthropic', 'openai');
438
- }
439
-
466
+ await this.findAllLlmDevices();
467
+ const excludeIds = new Set<string>();
440
468
  let lastError: any = null;
469
+ const maxAttempts = Math.max(1, this.llmDevices.length || 1);
441
470
 
442
- for (const formatType of formatsToTry) {
443
- try {
444
- this.console.log(`[Discovery] Trying ${formatType} image format for ${cameraName}...`);
471
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
472
+ const llm = await this.selectLlmDevice(excludeIds);
473
+ if (!llm?.getChatCompletion) {
474
+ analysis.error = 'No LLM device available';
475
+ return analysis;
476
+ }
477
+
478
+ let allFormatsVisionError = false;
479
+
480
+ // Try with detected provider format first, then fallback to alternates
481
+ // The order matters: try the most likely formats first
482
+ const formatsToTry: LlmProvider[] = [];
483
+
484
+ // Start with detected format
485
+ formatsToTry.push(this.llmProviderType);
486
+
487
+ // Add fallbacks based on detected provider
488
+ if (this.llmProviderType === 'openai') {
489
+ formatsToTry.push('scrypted', 'anthropic');
490
+ } else if (this.llmProviderType === 'anthropic') {
491
+ formatsToTry.push('scrypted', 'openai');
492
+ } else if (this.llmProviderType === 'scrypted') {
493
+ formatsToTry.push('anthropic', 'openai');
494
+ } else {
495
+ // Unknown - try all formats
496
+ formatsToTry.push('scrypted', 'anthropic', 'openai');
497
+ }
498
+
499
+ let visionFormatFailures = 0;
500
+ for (const formatType of formatsToTry) {
501
+ try {
502
+ this.console.log(`[Discovery] Trying ${formatType} image format for ${cameraName}...`);
445
503
 
446
504
  // Build prompt with camera context (height)
447
505
  const cameraNode = this.topology ? findCamera(this.topology, cameraId) : null;
@@ -459,7 +517,7 @@ Use the mount height to help estimate distances - objects at ground level will a
459
517
  `;
460
518
 
461
519
  // Build multimodal message with provider-specific image format
462
- const result = await llm.getChatCompletion({
520
+ const result = await llm.getChatCompletion({
463
521
  messages: [
464
522
  {
465
523
  role: 'user',
@@ -473,91 +531,119 @@ Use the mount height to help estimate distances - objects at ground level will a
473
531
  temperature: 0.3,
474
532
  });
475
533
 
476
- const content = result?.choices?.[0]?.message?.content;
477
- if (content && typeof content === 'string') {
478
- try {
479
- // Extract JSON from response (handle markdown code blocks)
480
- let jsonStr = content.trim();
481
- if (jsonStr.startsWith('```')) {
482
- jsonStr = jsonStr.replace(/```json?\n?/g, '').replace(/```$/g, '').trim();
483
- }
534
+ const content = result?.choices?.[0]?.message?.content;
535
+ if (content && typeof content === 'string') {
536
+ try {
537
+ // Extract JSON from response (handle markdown code blocks)
538
+ let jsonStr = content.trim();
539
+ if (jsonStr.startsWith('```')) {
540
+ jsonStr = jsonStr.replace(/```json?\n?/g, '').replace(/```$/g, '').trim();
541
+ }
484
542
 
485
- // Try to recover truncated JSON
486
- const parsed = this.parseJsonWithRecovery(jsonStr, cameraName);
543
+ // Try to recover truncated JSON
544
+ const parsed = this.parseJsonWithRecovery(jsonStr, cameraName);
545
+
546
+ // Map parsed data to our types
547
+ if (Array.isArray(parsed.landmarks)) {
548
+ analysis.landmarks = parsed.landmarks.map((l: any) => ({
549
+ name: l.name || 'Unknown',
550
+ type: this.mapLandmarkType(l.type),
551
+ confidence: typeof l.confidence === 'number' ? l.confidence : 0.7,
552
+ distance: this.mapDistance(l.distance),
553
+ description: l.description || '',
554
+ boundingBox: l.boundingBox,
555
+ }));
556
+ }
487
557
 
488
- // Map parsed data to our types
489
- if (Array.isArray(parsed.landmarks)) {
490
- analysis.landmarks = parsed.landmarks.map((l: any) => ({
491
- name: l.name || 'Unknown',
492
- type: this.mapLandmarkType(l.type),
493
- confidence: typeof l.confidence === 'number' ? l.confidence : 0.7,
494
- distance: this.mapDistance(l.distance),
495
- description: l.description || '',
496
- boundingBox: l.boundingBox,
497
- }));
498
- }
558
+ if (Array.isArray(parsed.zones)) {
559
+ analysis.zones = parsed.zones.map((z: any) => ({
560
+ name: z.name || 'Unknown',
561
+ type: this.mapZoneType(z.type),
562
+ coverage: typeof z.coverage === 'number' ? z.coverage : 0.5,
563
+ description: z.description || '',
564
+ boundingBox: z.boundingBox,
565
+ distance: this.mapDistance(z.distance), // Parse distance for zones too
566
+ } as DiscoveredZone & { distance?: DistanceEstimate }));
567
+ }
499
568
 
500
- if (Array.isArray(parsed.zones)) {
501
- analysis.zones = parsed.zones.map((z: any) => ({
502
- name: z.name || 'Unknown',
503
- type: this.mapZoneType(z.type),
504
- coverage: typeof z.coverage === 'number' ? z.coverage : 0.5,
505
- description: z.description || '',
506
- boundingBox: z.boundingBox,
507
- distance: this.mapDistance(z.distance), // Parse distance for zones too
508
- } as DiscoveredZone & { distance?: DistanceEstimate }));
509
- }
569
+ if (parsed.edges && typeof parsed.edges === 'object') {
570
+ analysis.edges = {
571
+ top: parsed.edges.top || '',
572
+ left: parsed.edges.left || '',
573
+ right: parsed.edges.right || '',
574
+ bottom: parsed.edges.bottom || '',
575
+ };
576
+ }
510
577
 
511
- if (parsed.edges && typeof parsed.edges === 'object') {
512
- analysis.edges = {
513
- top: parsed.edges.top || '',
514
- left: parsed.edges.left || '',
515
- right: parsed.edges.right || '',
516
- bottom: parsed.edges.bottom || '',
517
- };
518
- }
578
+ if (parsed.orientation) {
579
+ analysis.orientation = this.mapOrientation(parsed.orientation);
580
+ }
519
581
 
520
- if (parsed.orientation) {
521
- analysis.orientation = this.mapOrientation(parsed.orientation);
522
- }
582
+ analysis.isValid = true;
583
+ this.console.log(`[Discovery] Analyzed ${cameraName}: ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones (using ${formatType} format)`);
523
584
 
524
- analysis.isValid = true;
525
- this.console.log(`[Discovery] Analyzed ${cameraName}: ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones (using ${formatType} format)`);
585
+ // Update the preferred format for future requests
586
+ if (formatType !== this.llmProviderType) {
587
+ this.console.log(`[Discovery] Switching to ${formatType} format for future requests`);
588
+ this.llmProviderType = formatType;
589
+ }
526
590
 
527
- // Update the preferred format for future requests
528
- if (formatType !== this.llmProviderType) {
529
- this.console.log(`[Discovery] Switching to ${formatType} format for future requests`);
530
- this.llmProviderType = formatType;
591
+ // Success - exit the retry loop
592
+ return analysis;
593
+ } catch (parseError) {
594
+ this.console.warn(`[Discovery] Failed to parse LLM response for ${cameraName}:`, parseError);
595
+ analysis.error = 'Failed to parse LLM response';
596
+ return analysis;
531
597
  }
598
+ }
599
+ } catch (e) {
600
+ lastError = e;
601
+
602
+ // Check if this is a vision/multimodal format error
603
+ if (isVisionFormatError(e)) {
604
+ this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
605
+ visionFormatFailures++;
606
+ continue; // Try next format
607
+ }
532
608
 
533
- // Success - exit the retry loop
534
- return analysis;
535
- } catch (parseError) {
536
- this.console.warn(`[Discovery] Failed to parse LLM response for ${cameraName}:`, parseError);
537
- analysis.error = 'Failed to parse LLM response';
538
- return analysis;
609
+ // Retry with a different LLM if error indicates bad endpoint/model
610
+ if (this.isRetryableLlmError(e)) {
611
+ this.console.warn(`[Discovery] LLM error for ${cameraName}, trying another provider...`);
612
+ this.markLlmError(llm);
613
+ const llmEntry = this.llmDevices.find(d => d.device === llm);
614
+ if (llmEntry) {
615
+ excludeIds.add(llmEntry.id);
616
+ }
617
+ break;
539
618
  }
540
- }
541
- } catch (e) {
542
- lastError = e;
543
619
 
544
- // Check if this is a vision/multimodal format error
545
- if (isVisionFormatError(e)) {
546
- this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
547
- continue; // Try next format
620
+ // Not a format error - don't retry
621
+ this.console.warn(`[Discovery] Scene analysis failed for ${cameraName}:`, e);
622
+ break;
548
623
  }
624
+ }
549
625
 
550
- // Not a format error - don't retry
551
- this.console.warn(`[Discovery] Scene analysis failed for ${cameraName}:`, e);
552
- break;
626
+ allFormatsVisionError = visionFormatFailures > 0 && visionFormatFailures === formatsToTry.length;
627
+ if (allFormatsVisionError) {
628
+ const llmEntry = this.llmDevices.find(d => d.device === llm);
629
+ if (llmEntry) {
630
+ llmEntry.visionCapable = false;
631
+ excludeIds.add(llmEntry.id);
632
+ this.console.warn(`[Discovery] ${llmEntry.name} does not support vision. Excluding from discovery.`);
633
+ }
553
634
  }
554
635
  }
555
636
 
556
637
  // All formats failed
557
638
  if (lastError) {
558
639
  // Track error for load balancing
559
- if (llm) {
560
- this.markLlmError(llm);
640
+ // Note: llm may be null here if no device was available
641
+ if (lastError && !this.isRetryableLlmError(lastError)) {
642
+ // Best-effort error accounting for the most recent device
643
+ const lastDevice = this.llmDevice;
644
+ if (lastDevice) {
645
+ this.markLlmError(lastDevice);
646
+ }
561
647
  }
562
648
 
563
649
  const errorStr = String(lastError);
package/src/main.ts CHANGED
@@ -6,6 +6,7 @@ import sdk, {
6
6
  Setting,
7
7
  SettingValue,
8
8
  ScryptedDeviceBase,
9
+ ScryptedDevice,
9
10
  ScryptedDeviceType,
10
11
  ScryptedInterface,
11
12
  ScryptedNativeId,
@@ -764,6 +765,20 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
764
765
  // ==================== 8. Auto-Topology Discovery ====================
765
766
  addGroup('Auto-Topology Discovery');
766
767
 
768
+ if (this.discoveryEngine) {
769
+ const excluded = this.discoveryEngine.getExcludedVisionLlmNames();
770
+ if (excluded.length > 0) {
771
+ settings.push({
772
+ key: 'excludedVisionLlms',
773
+ title: 'Excluded LLMs (No Vision)',
774
+ type: 'string',
775
+ readonly: true,
776
+ value: excluded.join(', '),
777
+ group: 'Auto-Topology Discovery',
778
+ });
779
+ }
780
+ }
781
+
767
782
  // ==================== 9. MQTT Integration ====================
768
783
  addGroup('MQTT Integration');
769
784
 
@@ -781,6 +796,7 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
781
796
  key === 'lostTimeout' ||
782
797
  key === 'useVisualMatching' ||
783
798
  key === 'loiteringThreshold' ||
799
+ key === 'minDetectionScore' ||
784
800
  key === 'objectAlertCooldown' ||
785
801
  key === 'useLlmDescriptions' ||
786
802
  key === 'llmDebounceInterval' ||
@@ -916,7 +932,7 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
916
932
 
917
933
  // Training Mode endpoints
918
934
  if (path.endsWith('/api/training/start')) {
919
- return this.handleTrainingStartRequest(request, response);
935
+ return await this.handleTrainingStartRequest(request, response);
920
936
  }
921
937
  if (path.endsWith('/api/training/pause')) {
922
938
  return this.handleTrainingPauseRequest(response);
@@ -1510,13 +1526,25 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
1510
1526
 
1511
1527
  // ==================== Training Mode Handlers ====================
1512
1528
 
1513
- private handleTrainingStartRequest(request: HttpRequest, response: HttpResponse): void {
1529
+ private async handleTrainingStartRequest(request: HttpRequest, response: HttpResponse): Promise<void> {
1514
1530
  if (!this.trackingEngine) {
1515
- response.send(JSON.stringify({ error: 'Tracking engine not running. Configure topology first.' }), {
1516
- code: 500,
1517
- headers: { 'Content-Type': 'application/json' },
1518
- });
1519
- return;
1531
+ const topologyJson = this.storage.getItem('topology');
1532
+ const topology = topologyJson ? JSON.parse(topologyJson) as CameraTopology : createEmptyTopology();
1533
+
1534
+ if (!topology.cameras?.length) {
1535
+ const cameras = this.buildTopologyCamerasFromSettings();
1536
+ if (cameras.length === 0) {
1537
+ response.send(JSON.stringify({ error: 'No cameras configured. Select tracked cameras first.' }), {
1538
+ code: 400,
1539
+ headers: { 'Content-Type': 'application/json' },
1540
+ });
1541
+ return;
1542
+ }
1543
+ topology.cameras = cameras;
1544
+ this.storage.setItem('topology', JSON.stringify(topology));
1545
+ }
1546
+
1547
+ await this.startTrackingEngine(topology);
1520
1548
  }
1521
1549
 
1522
1550
  try {
@@ -2324,6 +2352,27 @@ Access the visual topology editor at \`/ui/editor\` to configure camera relation
2324
2352
  const topologyJson = this.storage.getItem('topology');
2325
2353
  return topologyJson ? JSON.parse(topologyJson) : null;
2326
2354
  }
2355
+
2356
+ private buildTopologyCamerasFromSettings(): CameraTopology['cameras'] {
2357
+ const value = this.storageSettings.values.trackedCameras;
2358
+ const cameraIds = Array.isArray(value)
2359
+ ? value.filter(Boolean)
2360
+ : typeof value === 'string' && value.length
2361
+ ? [value]
2362
+ : [];
2363
+
2364
+ return cameraIds.map((deviceId: string) => {
2365
+ const device = systemManager.getDeviceById<ScryptedDevice>(deviceId);
2366
+ return {
2367
+ deviceId,
2368
+ nativeId: device?.nativeId || deviceId,
2369
+ name: device?.name || deviceId,
2370
+ isEntryPoint: false,
2371
+ isExitPoint: false,
2372
+ trackClasses: [],
2373
+ };
2374
+ });
2375
+ }
2327
2376
  }
2328
2377
 
2329
2378
  export default SpatialAwarenessPlugin;