@blueharford/scrypted-spatial-awareness 0.6.33 → 0.6.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/README.md +2 -0
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +237 -119
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/topology-discovery.ts +181 -95
- package/src/main.ts +56 -7
package/out/plugin.zip
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -231,6 +231,13 @@ export class TopologyDiscoveryEngine {
|
|
|
231
231
|
return { ...this.status };
|
|
232
232
|
}
|
|
233
233
|
|
|
234
|
+
/** Get list of LLMs excluded for lack of vision support */
|
|
235
|
+
getExcludedVisionLlmNames(): string[] {
|
|
236
|
+
return this.llmDevices
|
|
237
|
+
.filter(l => !l.visionCapable)
|
|
238
|
+
.map(l => l.name || l.id);
|
|
239
|
+
}
|
|
240
|
+
|
|
234
241
|
/** Get pending suggestions */
|
|
235
242
|
getPendingSuggestions(): DiscoverySuggestion[] {
|
|
236
243
|
return Array.from(this.suggestions.values())
|
|
@@ -262,6 +269,7 @@ export class TopologyDiscoveryEngine {
|
|
|
262
269
|
providerType: LlmProvider;
|
|
263
270
|
lastUsed: number;
|
|
264
271
|
errorCount: number;
|
|
272
|
+
visionCapable: boolean;
|
|
265
273
|
}> = [];
|
|
266
274
|
|
|
267
275
|
/** Find ALL LLM devices for load balancing */
|
|
@@ -294,6 +302,7 @@ export class TopologyDiscoveryEngine {
|
|
|
294
302
|
providerType,
|
|
295
303
|
lastUsed: 0,
|
|
296
304
|
errorCount: 0,
|
|
305
|
+
visionCapable: true,
|
|
297
306
|
});
|
|
298
307
|
|
|
299
308
|
this.console.log(`[Discovery] Found LLM: ${device.name}`);
|
|
@@ -348,6 +357,48 @@ export class TopologyDiscoveryEngine {
|
|
|
348
357
|
return selected.device;
|
|
349
358
|
}
|
|
350
359
|
|
|
360
|
+
/** Select an LLM device, excluding any IDs if provided */
|
|
361
|
+
private async selectLlmDevice(excludeIds: Set<string>): Promise<ChatCompletionDevice | null> {
|
|
362
|
+
await this.findAllLlmDevices();
|
|
363
|
+
|
|
364
|
+
if (this.llmDevices.length === 0) return null;
|
|
365
|
+
|
|
366
|
+
let bestIndex = -1;
|
|
367
|
+
let bestScore = Infinity;
|
|
368
|
+
|
|
369
|
+
for (let i = 0; i < this.llmDevices.length; i++) {
|
|
370
|
+
const llm = this.llmDevices[i];
|
|
371
|
+
if (excludeIds.has(llm.id)) continue;
|
|
372
|
+
if (!llm.visionCapable) continue;
|
|
373
|
+
const score = llm.lastUsed + (llm.errorCount * 60000);
|
|
374
|
+
if (score < bestScore) {
|
|
375
|
+
bestScore = score;
|
|
376
|
+
bestIndex = i;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
if (bestIndex === -1) return null;
|
|
381
|
+
|
|
382
|
+
const selected = this.llmDevices[bestIndex];
|
|
383
|
+
this.llmDevice = selected.device;
|
|
384
|
+
this.llmProviderType = selected.providerType;
|
|
385
|
+
selected.lastUsed = Date.now();
|
|
386
|
+
|
|
387
|
+
this.console.log(`[Discovery] Selected LLM: ${selected.name}`);
|
|
388
|
+
return selected.device;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
private isRetryableLlmError(error: any): boolean {
|
|
392
|
+
const errorStr = String(error).toLowerCase();
|
|
393
|
+
return (
|
|
394
|
+
errorStr.includes('404') ||
|
|
395
|
+
errorStr.includes('not found') ||
|
|
396
|
+
errorStr.includes('no such model') ||
|
|
397
|
+
errorStr.includes('model not found') ||
|
|
398
|
+
errorStr.includes('endpoint')
|
|
399
|
+
);
|
|
400
|
+
}
|
|
401
|
+
|
|
351
402
|
/** Mark an LLM as having an error */
|
|
352
403
|
private markLlmError(device: ChatCompletionDevice): void {
|
|
353
404
|
const llm = this.llmDevices.find(l => l.device === device);
|
|
@@ -406,42 +457,49 @@ export class TopologyDiscoveryEngine {
|
|
|
406
457
|
isValid: false,
|
|
407
458
|
};
|
|
408
459
|
|
|
409
|
-
const llm = await this.findLlmDevice();
|
|
410
|
-
if (!llm?.getChatCompletion) {
|
|
411
|
-
analysis.error = 'No LLM device available';
|
|
412
|
-
return analysis;
|
|
413
|
-
}
|
|
414
|
-
|
|
415
460
|
const imageData = await this.getCameraSnapshot(cameraId);
|
|
416
461
|
if (!imageData) {
|
|
417
462
|
analysis.error = 'Failed to capture camera snapshot';
|
|
418
463
|
return analysis;
|
|
419
464
|
}
|
|
420
465
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
const formatsToTry: LlmProvider[] = [];
|
|
424
|
-
|
|
425
|
-
// Start with detected format
|
|
426
|
-
formatsToTry.push(this.llmProviderType);
|
|
427
|
-
|
|
428
|
-
// Add fallbacks based on detected provider
|
|
429
|
-
if (this.llmProviderType === 'openai') {
|
|
430
|
-
formatsToTry.push('scrypted', 'anthropic');
|
|
431
|
-
} else if (this.llmProviderType === 'anthropic') {
|
|
432
|
-
formatsToTry.push('scrypted', 'openai');
|
|
433
|
-
} else if (this.llmProviderType === 'scrypted') {
|
|
434
|
-
formatsToTry.push('anthropic', 'openai');
|
|
435
|
-
} else {
|
|
436
|
-
// Unknown - try all formats
|
|
437
|
-
formatsToTry.push('scrypted', 'anthropic', 'openai');
|
|
438
|
-
}
|
|
439
|
-
|
|
466
|
+
await this.findAllLlmDevices();
|
|
467
|
+
const excludeIds = new Set<string>();
|
|
440
468
|
let lastError: any = null;
|
|
469
|
+
const maxAttempts = Math.max(1, this.llmDevices.length || 1);
|
|
441
470
|
|
|
442
|
-
for (
|
|
443
|
-
|
|
444
|
-
|
|
471
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
472
|
+
const llm = await this.selectLlmDevice(excludeIds);
|
|
473
|
+
if (!llm?.getChatCompletion) {
|
|
474
|
+
analysis.error = 'No LLM device available';
|
|
475
|
+
return analysis;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
let allFormatsVisionError = false;
|
|
479
|
+
|
|
480
|
+
// Try with detected provider format first, then fallback to alternates
|
|
481
|
+
// The order matters: try the most likely formats first
|
|
482
|
+
const formatsToTry: LlmProvider[] = [];
|
|
483
|
+
|
|
484
|
+
// Start with detected format
|
|
485
|
+
formatsToTry.push(this.llmProviderType);
|
|
486
|
+
|
|
487
|
+
// Add fallbacks based on detected provider
|
|
488
|
+
if (this.llmProviderType === 'openai') {
|
|
489
|
+
formatsToTry.push('scrypted', 'anthropic');
|
|
490
|
+
} else if (this.llmProviderType === 'anthropic') {
|
|
491
|
+
formatsToTry.push('scrypted', 'openai');
|
|
492
|
+
} else if (this.llmProviderType === 'scrypted') {
|
|
493
|
+
formatsToTry.push('anthropic', 'openai');
|
|
494
|
+
} else {
|
|
495
|
+
// Unknown - try all formats
|
|
496
|
+
formatsToTry.push('scrypted', 'anthropic', 'openai');
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
let visionFormatFailures = 0;
|
|
500
|
+
for (const formatType of formatsToTry) {
|
|
501
|
+
try {
|
|
502
|
+
this.console.log(`[Discovery] Trying ${formatType} image format for ${cameraName}...`);
|
|
445
503
|
|
|
446
504
|
// Build prompt with camera context (height)
|
|
447
505
|
const cameraNode = this.topology ? findCamera(this.topology, cameraId) : null;
|
|
@@ -459,7 +517,7 @@ Use the mount height to help estimate distances - objects at ground level will a
|
|
|
459
517
|
`;
|
|
460
518
|
|
|
461
519
|
// Build multimodal message with provider-specific image format
|
|
462
|
-
|
|
520
|
+
const result = await llm.getChatCompletion({
|
|
463
521
|
messages: [
|
|
464
522
|
{
|
|
465
523
|
role: 'user',
|
|
@@ -473,91 +531,119 @@ Use the mount height to help estimate distances - objects at ground level will a
|
|
|
473
531
|
temperature: 0.3,
|
|
474
532
|
});
|
|
475
533
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
534
|
+
const content = result?.choices?.[0]?.message?.content;
|
|
535
|
+
if (content && typeof content === 'string') {
|
|
536
|
+
try {
|
|
537
|
+
// Extract JSON from response (handle markdown code blocks)
|
|
538
|
+
let jsonStr = content.trim();
|
|
539
|
+
if (jsonStr.startsWith('```')) {
|
|
540
|
+
jsonStr = jsonStr.replace(/```json?\n?/g, '').replace(/```$/g, '').trim();
|
|
541
|
+
}
|
|
484
542
|
|
|
485
|
-
|
|
486
|
-
|
|
543
|
+
// Try to recover truncated JSON
|
|
544
|
+
const parsed = this.parseJsonWithRecovery(jsonStr, cameraName);
|
|
545
|
+
|
|
546
|
+
// Map parsed data to our types
|
|
547
|
+
if (Array.isArray(parsed.landmarks)) {
|
|
548
|
+
analysis.landmarks = parsed.landmarks.map((l: any) => ({
|
|
549
|
+
name: l.name || 'Unknown',
|
|
550
|
+
type: this.mapLandmarkType(l.type),
|
|
551
|
+
confidence: typeof l.confidence === 'number' ? l.confidence : 0.7,
|
|
552
|
+
distance: this.mapDistance(l.distance),
|
|
553
|
+
description: l.description || '',
|
|
554
|
+
boundingBox: l.boundingBox,
|
|
555
|
+
}));
|
|
556
|
+
}
|
|
487
557
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
}
|
|
498
|
-
}
|
|
558
|
+
if (Array.isArray(parsed.zones)) {
|
|
559
|
+
analysis.zones = parsed.zones.map((z: any) => ({
|
|
560
|
+
name: z.name || 'Unknown',
|
|
561
|
+
type: this.mapZoneType(z.type),
|
|
562
|
+
coverage: typeof z.coverage === 'number' ? z.coverage : 0.5,
|
|
563
|
+
description: z.description || '',
|
|
564
|
+
boundingBox: z.boundingBox,
|
|
565
|
+
distance: this.mapDistance(z.distance), // Parse distance for zones too
|
|
566
|
+
} as DiscoveredZone & { distance?: DistanceEstimate }));
|
|
567
|
+
}
|
|
499
568
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
} as DiscoveredZone & { distance?: DistanceEstimate }));
|
|
509
|
-
}
|
|
569
|
+
if (parsed.edges && typeof parsed.edges === 'object') {
|
|
570
|
+
analysis.edges = {
|
|
571
|
+
top: parsed.edges.top || '',
|
|
572
|
+
left: parsed.edges.left || '',
|
|
573
|
+
right: parsed.edges.right || '',
|
|
574
|
+
bottom: parsed.edges.bottom || '',
|
|
575
|
+
};
|
|
576
|
+
}
|
|
510
577
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
left: parsed.edges.left || '',
|
|
515
|
-
right: parsed.edges.right || '',
|
|
516
|
-
bottom: parsed.edges.bottom || '',
|
|
517
|
-
};
|
|
518
|
-
}
|
|
578
|
+
if (parsed.orientation) {
|
|
579
|
+
analysis.orientation = this.mapOrientation(parsed.orientation);
|
|
580
|
+
}
|
|
519
581
|
|
|
520
|
-
|
|
521
|
-
analysis.
|
|
522
|
-
}
|
|
582
|
+
analysis.isValid = true;
|
|
583
|
+
this.console.log(`[Discovery] Analyzed ${cameraName}: ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones (using ${formatType} format)`);
|
|
523
584
|
|
|
524
|
-
|
|
525
|
-
|
|
585
|
+
// Update the preferred format for future requests
|
|
586
|
+
if (formatType !== this.llmProviderType) {
|
|
587
|
+
this.console.log(`[Discovery] Switching to ${formatType} format for future requests`);
|
|
588
|
+
this.llmProviderType = formatType;
|
|
589
|
+
}
|
|
526
590
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
this.
|
|
591
|
+
// Success - exit the retry loop
|
|
592
|
+
return analysis;
|
|
593
|
+
} catch (parseError) {
|
|
594
|
+
this.console.warn(`[Discovery] Failed to parse LLM response for ${cameraName}:`, parseError);
|
|
595
|
+
analysis.error = 'Failed to parse LLM response';
|
|
596
|
+
return analysis;
|
|
531
597
|
}
|
|
598
|
+
}
|
|
599
|
+
} catch (e) {
|
|
600
|
+
lastError = e;
|
|
601
|
+
|
|
602
|
+
// Check if this is a vision/multimodal format error
|
|
603
|
+
if (isVisionFormatError(e)) {
|
|
604
|
+
this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
|
|
605
|
+
visionFormatFailures++;
|
|
606
|
+
continue; // Try next format
|
|
607
|
+
}
|
|
532
608
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
this.
|
|
537
|
-
|
|
538
|
-
|
|
609
|
+
// Retry with a different LLM if error indicates bad endpoint/model
|
|
610
|
+
if (this.isRetryableLlmError(e)) {
|
|
611
|
+
this.console.warn(`[Discovery] LLM error for ${cameraName}, trying another provider...`);
|
|
612
|
+
this.markLlmError(llm);
|
|
613
|
+
const llmEntry = this.llmDevices.find(d => d.device === llm);
|
|
614
|
+
if (llmEntry) {
|
|
615
|
+
excludeIds.add(llmEntry.id);
|
|
616
|
+
}
|
|
617
|
+
break;
|
|
539
618
|
}
|
|
540
|
-
}
|
|
541
|
-
} catch (e) {
|
|
542
|
-
lastError = e;
|
|
543
619
|
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
continue; // Try next format
|
|
620
|
+
// Not a format error - don't retry
|
|
621
|
+
this.console.warn(`[Discovery] Scene analysis failed for ${cameraName}:`, e);
|
|
622
|
+
break;
|
|
548
623
|
}
|
|
624
|
+
}
|
|
549
625
|
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
626
|
+
allFormatsVisionError = visionFormatFailures > 0 && visionFormatFailures === formatsToTry.length;
|
|
627
|
+
if (allFormatsVisionError) {
|
|
628
|
+
const llmEntry = this.llmDevices.find(d => d.device === llm);
|
|
629
|
+
if (llmEntry) {
|
|
630
|
+
llmEntry.visionCapable = false;
|
|
631
|
+
excludeIds.add(llmEntry.id);
|
|
632
|
+
this.console.warn(`[Discovery] ${llmEntry.name} does not support vision. Excluding from discovery.`);
|
|
633
|
+
}
|
|
553
634
|
}
|
|
554
635
|
}
|
|
555
636
|
|
|
556
637
|
// All formats failed
|
|
557
638
|
if (lastError) {
|
|
558
639
|
// Track error for load balancing
|
|
559
|
-
|
|
560
|
-
|
|
640
|
+
// Note: llm may be null here if no device was available
|
|
641
|
+
if (lastError && !this.isRetryableLlmError(lastError)) {
|
|
642
|
+
// Best-effort error accounting for the most recent device
|
|
643
|
+
const lastDevice = this.llmDevice;
|
|
644
|
+
if (lastDevice) {
|
|
645
|
+
this.markLlmError(lastDevice);
|
|
646
|
+
}
|
|
561
647
|
}
|
|
562
648
|
|
|
563
649
|
const errorStr = String(lastError);
|
package/src/main.ts
CHANGED
|
@@ -6,6 +6,7 @@ import sdk, {
|
|
|
6
6
|
Setting,
|
|
7
7
|
SettingValue,
|
|
8
8
|
ScryptedDeviceBase,
|
|
9
|
+
ScryptedDevice,
|
|
9
10
|
ScryptedDeviceType,
|
|
10
11
|
ScryptedInterface,
|
|
11
12
|
ScryptedNativeId,
|
|
@@ -764,6 +765,20 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
|
|
|
764
765
|
// ==================== 8. Auto-Topology Discovery ====================
|
|
765
766
|
addGroup('Auto-Topology Discovery');
|
|
766
767
|
|
|
768
|
+
if (this.discoveryEngine) {
|
|
769
|
+
const excluded = this.discoveryEngine.getExcludedVisionLlmNames();
|
|
770
|
+
if (excluded.length > 0) {
|
|
771
|
+
settings.push({
|
|
772
|
+
key: 'excludedVisionLlms',
|
|
773
|
+
title: 'Excluded LLMs (No Vision)',
|
|
774
|
+
type: 'string',
|
|
775
|
+
readonly: true,
|
|
776
|
+
value: excluded.join(', '),
|
|
777
|
+
group: 'Auto-Topology Discovery',
|
|
778
|
+
});
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
|
|
767
782
|
// ==================== 9. MQTT Integration ====================
|
|
768
783
|
addGroup('MQTT Integration');
|
|
769
784
|
|
|
@@ -781,6 +796,7 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
|
|
|
781
796
|
key === 'lostTimeout' ||
|
|
782
797
|
key === 'useVisualMatching' ||
|
|
783
798
|
key === 'loiteringThreshold' ||
|
|
799
|
+
key === 'minDetectionScore' ||
|
|
784
800
|
key === 'objectAlertCooldown' ||
|
|
785
801
|
key === 'useLlmDescriptions' ||
|
|
786
802
|
key === 'llmDebounceInterval' ||
|
|
@@ -916,7 +932,7 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
|
|
|
916
932
|
|
|
917
933
|
// Training Mode endpoints
|
|
918
934
|
if (path.endsWith('/api/training/start')) {
|
|
919
|
-
return this.handleTrainingStartRequest(request, response);
|
|
935
|
+
return await this.handleTrainingStartRequest(request, response);
|
|
920
936
|
}
|
|
921
937
|
if (path.endsWith('/api/training/pause')) {
|
|
922
938
|
return this.handleTrainingPauseRequest(response);
|
|
@@ -1510,13 +1526,25 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
|
|
|
1510
1526
|
|
|
1511
1527
|
// ==================== Training Mode Handlers ====================
|
|
1512
1528
|
|
|
1513
|
-
private handleTrainingStartRequest(request: HttpRequest, response: HttpResponse): void {
|
|
1529
|
+
private async handleTrainingStartRequest(request: HttpRequest, response: HttpResponse): Promise<void> {
|
|
1514
1530
|
if (!this.trackingEngine) {
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1531
|
+
const topologyJson = this.storage.getItem('topology');
|
|
1532
|
+
const topology = topologyJson ? JSON.parse(topologyJson) as CameraTopology : createEmptyTopology();
|
|
1533
|
+
|
|
1534
|
+
if (!topology.cameras?.length) {
|
|
1535
|
+
const cameras = this.buildTopologyCamerasFromSettings();
|
|
1536
|
+
if (cameras.length === 0) {
|
|
1537
|
+
response.send(JSON.stringify({ error: 'No cameras configured. Select tracked cameras first.' }), {
|
|
1538
|
+
code: 400,
|
|
1539
|
+
headers: { 'Content-Type': 'application/json' },
|
|
1540
|
+
});
|
|
1541
|
+
return;
|
|
1542
|
+
}
|
|
1543
|
+
topology.cameras = cameras;
|
|
1544
|
+
this.storage.setItem('topology', JSON.stringify(topology));
|
|
1545
|
+
}
|
|
1546
|
+
|
|
1547
|
+
await this.startTrackingEngine(topology);
|
|
1520
1548
|
}
|
|
1521
1549
|
|
|
1522
1550
|
try {
|
|
@@ -2324,6 +2352,27 @@ Access the visual topology editor at \`/ui/editor\` to configure camera relation
|
|
|
2324
2352
|
const topologyJson = this.storage.getItem('topology');
|
|
2325
2353
|
return topologyJson ? JSON.parse(topologyJson) : null;
|
|
2326
2354
|
}
|
|
2355
|
+
|
|
2356
|
+
private buildTopologyCamerasFromSettings(): CameraTopology['cameras'] {
|
|
2357
|
+
const value = this.storageSettings.values.trackedCameras;
|
|
2358
|
+
const cameraIds = Array.isArray(value)
|
|
2359
|
+
? value.filter(Boolean)
|
|
2360
|
+
: typeof value === 'string' && value.length
|
|
2361
|
+
? [value]
|
|
2362
|
+
: [];
|
|
2363
|
+
|
|
2364
|
+
return cameraIds.map((deviceId: string) => {
|
|
2365
|
+
const device = systemManager.getDeviceById<ScryptedDevice>(deviceId);
|
|
2366
|
+
return {
|
|
2367
|
+
deviceId,
|
|
2368
|
+
nativeId: device?.nativeId || deviceId,
|
|
2369
|
+
name: device?.name || deviceId,
|
|
2370
|
+
isEntryPoint: false,
|
|
2371
|
+
isExitPoint: false,
|
|
2372
|
+
trackClasses: [],
|
|
2373
|
+
};
|
|
2374
|
+
});
|
|
2375
|
+
}
|
|
2327
2376
|
}
|
|
2328
2377
|
|
|
2329
2378
|
export default SpatialAwarenessPlugin;
|