@blueharford/scrypted-spatial-awareness 0.6.6 → 0.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +185 -12
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +147 -11
- package/src/core/tracking-engine.ts +34 -6
- package/src/main.ts +39 -1
package/out/plugin.zip
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -545,10 +545,11 @@ export class SpatialReasoningEngine {
|
|
|
545
545
|
}
|
|
546
546
|
|
|
547
547
|
/** Generate entry description when object enters property */
|
|
548
|
-
generateEntryDescription(
|
|
548
|
+
async generateEntryDescription(
|
|
549
549
|
tracked: TrackedObject,
|
|
550
|
-
cameraId: string
|
|
551
|
-
|
|
550
|
+
cameraId: string,
|
|
551
|
+
mediaObject?: MediaObject
|
|
552
|
+
): Promise<SpatialReasoningResult> {
|
|
552
553
|
if (!this.topology) {
|
|
553
554
|
return {
|
|
554
555
|
description: `${this.capitalizeFirst(tracked.className)} entered property`,
|
|
@@ -571,11 +572,10 @@ export class SpatialReasoningEngine {
|
|
|
571
572
|
const landmarks = getLandmarksVisibleFromCamera(this.topology, cameraId);
|
|
572
573
|
const objectType = this.capitalizeFirst(tracked.className);
|
|
573
574
|
|
|
574
|
-
// Build entry description using topology context
|
|
575
|
+
// Build basic entry description using topology context
|
|
575
576
|
const location = this.describeLocation(camera, landmarks, 'to');
|
|
576
577
|
|
|
577
578
|
// Check if we can determine where they came from (e.g., street, neighbor)
|
|
578
|
-
const entryLandmark = landmarks.find(l => l.isEntryPoint);
|
|
579
579
|
const streetLandmark = landmarks.find(l => l.type === 'street');
|
|
580
580
|
const neighborLandmark = landmarks.find(l => l.type === 'neighbor');
|
|
581
581
|
|
|
@@ -586,8 +586,25 @@ export class SpatialReasoningEngine {
|
|
|
586
586
|
source = ` from ${neighborLandmark.name}`;
|
|
587
587
|
}
|
|
588
588
|
|
|
589
|
+
const basicDescription = `${objectType} arrived at ${location}${source}`;
|
|
590
|
+
|
|
591
|
+
// Try LLM for enhanced description with visual details
|
|
592
|
+
if (this.config.enableLlm && mediaObject) {
|
|
593
|
+
const llmDescription = await this.getLlmEntryExitDescription(
|
|
594
|
+
tracked, camera, landmarks, 'entry', mediaObject
|
|
595
|
+
);
|
|
596
|
+
if (llmDescription) {
|
|
597
|
+
return {
|
|
598
|
+
description: llmDescription,
|
|
599
|
+
involvedLandmarks: landmarks,
|
|
600
|
+
confidence: 0.9,
|
|
601
|
+
usedLlm: true,
|
|
602
|
+
};
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
589
606
|
return {
|
|
590
|
-
description:
|
|
607
|
+
description: basicDescription,
|
|
591
608
|
involvedLandmarks: landmarks,
|
|
592
609
|
confidence: 0.8,
|
|
593
610
|
usedLlm: false,
|
|
@@ -595,10 +612,11 @@ export class SpatialReasoningEngine {
|
|
|
595
612
|
}
|
|
596
613
|
|
|
597
614
|
/** Generate exit description when object leaves property */
|
|
598
|
-
generateExitDescription(
|
|
615
|
+
async generateExitDescription(
|
|
599
616
|
tracked: TrackedObject,
|
|
600
|
-
cameraId: string
|
|
601
|
-
|
|
617
|
+
cameraId: string,
|
|
618
|
+
mediaObject?: MediaObject
|
|
619
|
+
): Promise<SpatialReasoningResult> {
|
|
602
620
|
if (!this.topology) {
|
|
603
621
|
return {
|
|
604
622
|
description: `${this.capitalizeFirst(tracked.className)} left property`,
|
|
@@ -621,7 +639,7 @@ export class SpatialReasoningEngine {
|
|
|
621
639
|
const landmarks = getLandmarksVisibleFromCamera(this.topology, cameraId);
|
|
622
640
|
const objectType = this.capitalizeFirst(tracked.className);
|
|
623
641
|
|
|
624
|
-
// Build exit description
|
|
642
|
+
// Build basic exit description
|
|
625
643
|
const location = this.describeLocation(camera, landmarks, 'from');
|
|
626
644
|
|
|
627
645
|
// Check for exit point landmarks
|
|
@@ -674,8 +692,25 @@ export class SpatialReasoningEngine {
|
|
|
674
692
|
}
|
|
675
693
|
}
|
|
676
694
|
|
|
695
|
+
const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
|
|
696
|
+
|
|
697
|
+
// Try LLM for enhanced description with visual details
|
|
698
|
+
if (this.config.enableLlm && mediaObject) {
|
|
699
|
+
const llmDescription = await this.getLlmEntryExitDescription(
|
|
700
|
+
tracked, camera, landmarks, 'exit', mediaObject, journeyContext
|
|
701
|
+
);
|
|
702
|
+
if (llmDescription) {
|
|
703
|
+
return {
|
|
704
|
+
description: llmDescription,
|
|
705
|
+
involvedLandmarks: landmarks,
|
|
706
|
+
confidence: 0.9,
|
|
707
|
+
usedLlm: true,
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
677
712
|
return {
|
|
678
|
-
description:
|
|
713
|
+
description: basicDescription,
|
|
679
714
|
involvedLandmarks: landmarks,
|
|
680
715
|
confidence: 0.8,
|
|
681
716
|
usedLlm: false,
|
|
@@ -952,6 +987,107 @@ export class SpatialReasoningEngine {
|
|
|
952
987
|
}
|
|
953
988
|
}
|
|
954
989
|
|
|
990
|
+
/** Get LLM-enhanced description for entry/exit events */
|
|
991
|
+
private async getLlmEntryExitDescription(
|
|
992
|
+
tracked: TrackedObject,
|
|
993
|
+
camera: CameraNode,
|
|
994
|
+
landmarks: Landmark[],
|
|
995
|
+
eventType: 'entry' | 'exit',
|
|
996
|
+
mediaObject: MediaObject,
|
|
997
|
+
journeyContext?: string
|
|
998
|
+
): Promise<string | null> {
|
|
999
|
+
const llm = await this.findLlmDevice();
|
|
1000
|
+
if (!llm || !llm.getChatCompletion) return null;
|
|
1001
|
+
|
|
1002
|
+
try {
|
|
1003
|
+
// Convert image to base64 for vision LLM
|
|
1004
|
+
const imageData = await mediaObjectToBase64(mediaObject);
|
|
1005
|
+
|
|
1006
|
+
const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
|
|
1007
|
+
const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
|
|
1008
|
+
|
|
1009
|
+
// Build context-aware prompt
|
|
1010
|
+
const prompt = eventType === 'entry'
|
|
1011
|
+
? `You are a security camera system. Analyze this image and describe who/what just arrived.
|
|
1012
|
+
|
|
1013
|
+
CONTEXT:
|
|
1014
|
+
- Camera: ${camera.name}
|
|
1015
|
+
- Object type: ${tracked.className}
|
|
1016
|
+
- Nearby landmarks: ${landmarkNames}
|
|
1017
|
+
|
|
1018
|
+
INSTRUCTIONS:
|
|
1019
|
+
Look at the image and generate a single, natural sentence describing:
|
|
1020
|
+
1. Physical description (if person: gender, clothing, items carried; if vehicle: color, type, make)
|
|
1021
|
+
2. What they appear to be doing (arriving, approaching, etc.)
|
|
1022
|
+
3. Relevant landmark context (driveway, front door, mailbox, etc.)
|
|
1023
|
+
|
|
1024
|
+
Examples of good descriptions:
|
|
1025
|
+
- "Man in gray hoodie approaching the front door"
|
|
1026
|
+
- "Woman in scrubs arriving with shopping bags"
|
|
1027
|
+
- "White delivery van pulling into the driveway"
|
|
1028
|
+
- "UPS driver carrying package towards the porch"
|
|
1029
|
+
- "Teenager on bicycle coming up the driveway"
|
|
1030
|
+
|
|
1031
|
+
Generate ONLY the description, nothing else:`
|
|
1032
|
+
: `You are a security camera system. Analyze this image and describe who/what is leaving.
|
|
1033
|
+
|
|
1034
|
+
CONTEXT:
|
|
1035
|
+
- Camera: ${camera.name}
|
|
1036
|
+
- Object type: ${tracked.className}
|
|
1037
|
+
- Time on property: ${dwellTime > 60 ? Math.round(dwellTime / 60) + ' minutes' : dwellTime + ' seconds'}
|
|
1038
|
+
- Nearby landmarks: ${landmarkNames}
|
|
1039
|
+
${journeyContext ? `- Journey: ${journeyContext}` : ''}
|
|
1040
|
+
|
|
1041
|
+
INSTRUCTIONS:
|
|
1042
|
+
Look at the image and generate a single, natural sentence describing:
|
|
1043
|
+
1. Physical description (if person: gender, clothing, items carried; if vehicle: color, type)
|
|
1044
|
+
2. What they did (if determinable from context)
|
|
1045
|
+
3. Direction they're leaving towards
|
|
1046
|
+
|
|
1047
|
+
Examples of good descriptions:
|
|
1048
|
+
- "Man in black hoodie leaving after checking the mailbox"
|
|
1049
|
+
- "Woman in business attire heading to car in driveway"
|
|
1050
|
+
- "Red sedan backing out of the driveway"
|
|
1051
|
+
- "Delivery driver returning to FedEx truck after leaving package"
|
|
1052
|
+
- "Landscaper with leaf blower heading to work truck"
|
|
1053
|
+
|
|
1054
|
+
Generate ONLY the description, nothing else:`;
|
|
1055
|
+
|
|
1056
|
+
// Build message content - use multimodal format if we have an image
|
|
1057
|
+
let messageContent: any;
|
|
1058
|
+
if (imageData) {
|
|
1059
|
+
messageContent = [
|
|
1060
|
+
{ type: 'text', text: prompt },
|
|
1061
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
1062
|
+
];
|
|
1063
|
+
} else {
|
|
1064
|
+
messageContent = prompt;
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
// Call LLM using ChatCompletion interface
|
|
1068
|
+
const result = await llm.getChatCompletion({
|
|
1069
|
+
messages: [
|
|
1070
|
+
{
|
|
1071
|
+
role: 'user',
|
|
1072
|
+
content: messageContent,
|
|
1073
|
+
},
|
|
1074
|
+
],
|
|
1075
|
+
max_tokens: 100,
|
|
1076
|
+
temperature: 0.7,
|
|
1077
|
+
});
|
|
1078
|
+
|
|
1079
|
+
const content = result?.choices?.[0]?.message?.content;
|
|
1080
|
+
if (content && typeof content === 'string') {
|
|
1081
|
+
return content.trim();
|
|
1082
|
+
}
|
|
1083
|
+
|
|
1084
|
+
return null;
|
|
1085
|
+
} catch (e) {
|
|
1086
|
+
this.console.warn(`LLM ${eventType} description generation failed:`, e);
|
|
1087
|
+
return null;
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
|
|
955
1091
|
/** Build LLM prompt with RAG context */
|
|
956
1092
|
private buildLlmPrompt(
|
|
957
1093
|
tracked: TrackedObject,
|
|
@@ -536,10 +536,24 @@ export class TrackingEngine {
|
|
|
536
536
|
// Check if we've already alerted for this object
|
|
537
537
|
if (this.isInAlertCooldown(globalId)) return;
|
|
538
538
|
|
|
539
|
-
//
|
|
540
|
-
|
|
539
|
+
// Get snapshot for LLM description (if LLM is enabled)
|
|
540
|
+
let mediaObject: MediaObject | undefined;
|
|
541
|
+
if (this.config.useLlmDescriptions) {
|
|
542
|
+
try {
|
|
543
|
+
const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
|
|
544
|
+
if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
|
|
545
|
+
mediaObject = await camera.takePicture();
|
|
546
|
+
}
|
|
547
|
+
} catch (e) {
|
|
548
|
+
this.console.warn('Failed to get snapshot for entry description:', e);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// Generate spatial description (now async with LLM support)
|
|
553
|
+
const spatialResult = await this.spatialReasoning.generateEntryDescription(
|
|
541
554
|
tracked,
|
|
542
|
-
sighting.cameraId
|
|
555
|
+
sighting.cameraId,
|
|
556
|
+
mediaObject
|
|
543
557
|
);
|
|
544
558
|
|
|
545
559
|
if (isEntryPoint) {
|
|
@@ -626,10 +640,24 @@ export class TrackingEngine {
|
|
|
626
640
|
if (current && current.state === 'pending') {
|
|
627
641
|
this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
|
|
628
642
|
|
|
629
|
-
//
|
|
630
|
-
|
|
643
|
+
// Get snapshot for LLM description (if LLM is enabled)
|
|
644
|
+
let mediaObject: MediaObject | undefined;
|
|
645
|
+
if (this.config.useLlmDescriptions) {
|
|
646
|
+
try {
|
|
647
|
+
const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
|
|
648
|
+
if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
|
|
649
|
+
mediaObject = await camera.takePicture();
|
|
650
|
+
}
|
|
651
|
+
} catch (e) {
|
|
652
|
+
this.console.warn('Failed to get snapshot for exit description:', e);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
// Generate rich exit description using topology context (now async with LLM support)
|
|
657
|
+
const spatialResult = await this.spatialReasoning.generateExitDescription(
|
|
631
658
|
current,
|
|
632
|
-
sighting.cameraId
|
|
659
|
+
sighting.cameraId,
|
|
660
|
+
mediaObject
|
|
633
661
|
);
|
|
634
662
|
|
|
635
663
|
this.console.log(
|
package/src/main.ts
CHANGED
|
@@ -1584,7 +1584,7 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
|
|
|
1584
1584
|
}
|
|
1585
1585
|
}
|
|
1586
1586
|
|
|
1587
|
-
private handleTrainingEndRequest(response: HttpResponse): void {
|
|
1587
|
+
private async handleTrainingEndRequest(response: HttpResponse): Promise<void> {
|
|
1588
1588
|
if (!this.trackingEngine) {
|
|
1589
1589
|
response.send(JSON.stringify({ error: 'Tracking engine not running' }), {
|
|
1590
1590
|
code: 500,
|
|
@@ -1595,6 +1595,44 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
|
|
|
1595
1595
|
|
|
1596
1596
|
const session = this.trackingEngine.endTrainingSession();
|
|
1597
1597
|
if (session) {
|
|
1598
|
+
// Get unique visited cameras
|
|
1599
|
+
const visitedCameraIds = [...new Set(session.visits.map(v => v.cameraId))];
|
|
1600
|
+
|
|
1601
|
+
// Auto-run discovery on visited cameras to detect landmarks and zones
|
|
1602
|
+
if (this.discoveryEngine && visitedCameraIds.length > 0) {
|
|
1603
|
+
this.console.log(`[Training] Running discovery analysis on ${visitedCameraIds.length} visited cameras...`);
|
|
1604
|
+
|
|
1605
|
+
let landmarksFound = 0;
|
|
1606
|
+
let zonesFound = 0;
|
|
1607
|
+
|
|
1608
|
+
for (const cameraId of visitedCameraIds) {
|
|
1609
|
+
try {
|
|
1610
|
+
const analysis = await this.discoveryEngine.analyzeScene(cameraId);
|
|
1611
|
+
if (analysis.isValid) {
|
|
1612
|
+
landmarksFound += analysis.landmarks.length;
|
|
1613
|
+
zonesFound += analysis.zones.length;
|
|
1614
|
+
this.console.log(`[Training] ${cameraId}: Found ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones`);
|
|
1615
|
+
}
|
|
1616
|
+
} catch (e) {
|
|
1617
|
+
this.console.warn(`[Training] Failed to analyze ${cameraId}:`, e);
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
|
|
1621
|
+
// Get all pending suggestions and auto-accept them
|
|
1622
|
+
const suggestions = this.discoveryEngine.getPendingSuggestions();
|
|
1623
|
+
for (const suggestion of suggestions) {
|
|
1624
|
+
this.applyDiscoverySuggestion(suggestion);
|
|
1625
|
+
this.discoveryEngine.acceptSuggestion(suggestion.id);
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
// Persist topology after applying suggestions
|
|
1629
|
+
if (suggestions.length > 0 && this.trackingEngine) {
|
|
1630
|
+
const updatedTopology = this.trackingEngine.getTopology();
|
|
1631
|
+
await this.storageSettings.putSetting('topology', JSON.stringify(updatedTopology));
|
|
1632
|
+
this.console.log(`[Training] Auto-applied ${suggestions.length} discoveries (${landmarksFound} landmarks, ${zonesFound} zones)`);
|
|
1633
|
+
}
|
|
1634
|
+
}
|
|
1635
|
+
|
|
1598
1636
|
response.send(JSON.stringify(session), {
|
|
1599
1637
|
headers: { 'Content-Type': 'application/json' },
|
|
1600
1638
|
});
|