@blueharford/scrypted-spatial-awareness 0.6.8 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +83 -24
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +73 -21
- package/src/core/tracking-engine.ts +12 -3
package/out/plugin.zip
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -589,11 +589,14 @@ export class SpatialReasoningEngine {
|
|
|
589
589
|
const basicDescription = `${objectType} arrived at ${location}${source}`;
|
|
590
590
|
|
|
591
591
|
// Try LLM for enhanced description with visual details
|
|
592
|
+
this.console.log(`[Entry] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
|
|
592
593
|
if (this.config.enableLlm && mediaObject) {
|
|
594
|
+
this.console.log(`[Entry] Attempting LLM description for entry event`);
|
|
593
595
|
const llmDescription = await this.getLlmEntryExitDescription(
|
|
594
596
|
tracked, camera, landmarks, 'entry', mediaObject
|
|
595
597
|
);
|
|
596
598
|
if (llmDescription) {
|
|
599
|
+
this.console.log(`[Entry] LLM returned: ${llmDescription.substring(0, 50)}...`);
|
|
597
600
|
return {
|
|
598
601
|
description: llmDescription,
|
|
599
602
|
involvedLandmarks: landmarks,
|
|
@@ -601,6 +604,9 @@ export class SpatialReasoningEngine {
|
|
|
601
604
|
usedLlm: true,
|
|
602
605
|
};
|
|
603
606
|
}
|
|
607
|
+
this.console.warn(`[Entry] LLM returned null, falling back to basic`);
|
|
608
|
+
} else {
|
|
609
|
+
this.console.log(`[Entry] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
|
|
604
610
|
}
|
|
605
611
|
|
|
606
612
|
return {
|
|
@@ -695,11 +701,14 @@ export class SpatialReasoningEngine {
|
|
|
695
701
|
const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
|
|
696
702
|
|
|
697
703
|
// Try LLM for enhanced description with visual details
|
|
704
|
+
this.console.log(`[Exit] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
|
|
698
705
|
if (this.config.enableLlm && mediaObject) {
|
|
706
|
+
this.console.log(`[Exit] Attempting LLM description for exit event`);
|
|
699
707
|
const llmDescription = await this.getLlmEntryExitDescription(
|
|
700
708
|
tracked, camera, landmarks, 'exit', mediaObject, journeyContext
|
|
701
709
|
);
|
|
702
710
|
if (llmDescription) {
|
|
711
|
+
this.console.log(`[Exit] LLM returned: ${llmDescription.substring(0, 50)}...`);
|
|
703
712
|
return {
|
|
704
713
|
description: llmDescription,
|
|
705
714
|
involvedLandmarks: landmarks,
|
|
@@ -707,6 +716,9 @@ export class SpatialReasoningEngine {
|
|
|
707
716
|
usedLlm: true,
|
|
708
717
|
};
|
|
709
718
|
}
|
|
719
|
+
this.console.warn(`[Exit] LLM returned null, falling back to basic`);
|
|
720
|
+
} else {
|
|
721
|
+
this.console.log(`[Exit] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
|
|
710
722
|
}
|
|
711
723
|
|
|
712
724
|
return {
|
|
@@ -996,12 +1008,24 @@ export class SpatialReasoningEngine {
|
|
|
996
1008
|
mediaObject: MediaObject,
|
|
997
1009
|
journeyContext?: string
|
|
998
1010
|
): Promise<string | null> {
|
|
1011
|
+
this.console.log(`[LLM] getLlmEntryExitDescription called for ${eventType} event`);
|
|
1012
|
+
|
|
999
1013
|
const llm = await this.findLlmDevice();
|
|
1000
|
-
if (!llm
|
|
1014
|
+
if (!llm) {
|
|
1015
|
+
this.console.warn(`[LLM] No LLM device found for ${eventType} description`);
|
|
1016
|
+
return null;
|
|
1017
|
+
}
|
|
1018
|
+
if (!llm.getChatCompletion) {
|
|
1019
|
+
this.console.warn(`[LLM] LLM device has no getChatCompletion method`);
|
|
1020
|
+
return null;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
this.console.log(`[LLM] Using LLM device: ${this.llmProvider}`);
|
|
1001
1024
|
|
|
1002
1025
|
try {
|
|
1003
1026
|
// Convert image to base64 for vision LLM
|
|
1004
1027
|
const imageData = await mediaObjectToBase64(mediaObject);
|
|
1028
|
+
this.console.log(`[LLM] Image converted: ${imageData ? 'success' : 'failed'}, type: ${imageData?.mediaType}`);
|
|
1005
1029
|
|
|
1006
1030
|
const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
|
|
1007
1031
|
const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
|
|
@@ -1053,37 +1077,65 @@ Examples of good descriptions:
|
|
|
1053
1077
|
|
|
1054
1078
|
Generate ONLY the description, nothing else:`;
|
|
1055
1079
|
|
|
1056
|
-
//
|
|
1057
|
-
let
|
|
1080
|
+
// Try multimodal format first, fall back to text-only if it fails
|
|
1081
|
+
let result: any;
|
|
1082
|
+
let usedVision = false;
|
|
1083
|
+
|
|
1058
1084
|
if (imageData) {
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1085
|
+
// First attempt: Try multimodal with image
|
|
1086
|
+
try {
|
|
1087
|
+
this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
|
|
1088
|
+
const multimodalContent = [
|
|
1089
|
+
{ type: 'text', text: prompt },
|
|
1090
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
1091
|
+
];
|
|
1092
|
+
|
|
1093
|
+
result = await llm.getChatCompletion({
|
|
1094
|
+
messages: [
|
|
1095
|
+
{
|
|
1096
|
+
role: 'user',
|
|
1097
|
+
content: multimodalContent,
|
|
1098
|
+
},
|
|
1099
|
+
],
|
|
1100
|
+
max_tokens: 100,
|
|
1101
|
+
temperature: 0.7,
|
|
1102
|
+
});
|
|
1103
|
+
usedVision = true;
|
|
1104
|
+
} catch (visionError: any) {
|
|
1105
|
+
// If vision format fails, try text-only
|
|
1106
|
+
if (isVisionFormatError(visionError)) {
|
|
1107
|
+
this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
|
|
1108
|
+
} else {
|
|
1109
|
+
this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1065
1112
|
}
|
|
1066
1113
|
|
|
1067
|
-
//
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1114
|
+
// If no result yet, try text-only
|
|
1115
|
+
if (!result) {
|
|
1116
|
+
this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
|
|
1117
|
+
result = await llm.getChatCompletion({
|
|
1118
|
+
messages: [
|
|
1119
|
+
{
|
|
1120
|
+
role: 'user',
|
|
1121
|
+
content: prompt,
|
|
1122
|
+
},
|
|
1123
|
+
],
|
|
1124
|
+
max_tokens: 100,
|
|
1125
|
+
temperature: 0.7,
|
|
1126
|
+
});
|
|
1127
|
+
}
|
|
1078
1128
|
|
|
1079
1129
|
const content = result?.choices?.[0]?.message?.content;
|
|
1080
1130
|
if (content && typeof content === 'string') {
|
|
1131
|
+
this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
|
|
1081
1132
|
return content.trim();
|
|
1082
1133
|
}
|
|
1083
1134
|
|
|
1135
|
+
this.console.warn(`[LLM] No content in response for ${eventType}`);
|
|
1084
1136
|
return null;
|
|
1085
1137
|
} catch (e) {
|
|
1086
|
-
this.console.warn(`LLM ${eventType} description generation failed:`, e);
|
|
1138
|
+
this.console.warn(`[LLM] ${eventType} description generation failed:`, e);
|
|
1087
1139
|
return null;
|
|
1088
1140
|
}
|
|
1089
1141
|
}
|
|
@@ -538,23 +538,28 @@ export class TrackingEngine {
|
|
|
538
538
|
|
|
539
539
|
// Get snapshot for LLM description (if LLM is enabled)
|
|
540
540
|
let mediaObject: MediaObject | undefined;
|
|
541
|
+
this.console.log(`[Entry Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
|
|
541
542
|
if (this.config.useLlmDescriptions) {
|
|
542
543
|
try {
|
|
543
544
|
const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
|
|
545
|
+
this.console.log(`[Entry Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(ScryptedInterface.Camera)}`);
|
|
544
546
|
if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
|
|
545
547
|
mediaObject = await camera.takePicture();
|
|
548
|
+
this.console.log(`[Entry Alert] Got snapshot: ${!!mediaObject}`);
|
|
546
549
|
}
|
|
547
550
|
} catch (e) {
|
|
548
|
-
this.console.warn('Failed to get snapshot
|
|
551
|
+
this.console.warn('[Entry Alert] Failed to get snapshot:', e);
|
|
549
552
|
}
|
|
550
553
|
}
|
|
551
554
|
|
|
552
555
|
// Generate spatial description (now async with LLM support)
|
|
556
|
+
this.console.log(`[Entry Alert] Calling generateEntryDescription with mediaObject=${!!mediaObject}`);
|
|
553
557
|
const spatialResult = await this.spatialReasoning.generateEntryDescription(
|
|
554
558
|
tracked,
|
|
555
559
|
sighting.cameraId,
|
|
556
560
|
mediaObject
|
|
557
561
|
);
|
|
562
|
+
this.console.log(`[Entry Alert] Got description: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
|
|
558
563
|
|
|
559
564
|
if (isEntryPoint) {
|
|
560
565
|
// Entry point - generate property entry alert
|
|
@@ -642,18 +647,22 @@ export class TrackingEngine {
|
|
|
642
647
|
|
|
643
648
|
// Get snapshot for LLM description (if LLM is enabled)
|
|
644
649
|
let mediaObject: MediaObject | undefined;
|
|
650
|
+
this.console.log(`[Exit Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
|
|
645
651
|
if (this.config.useLlmDescriptions) {
|
|
646
652
|
try {
|
|
647
653
|
const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
|
|
654
|
+
this.console.log(`[Exit Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(ScryptedInterface.Camera)}`);
|
|
648
655
|
if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
|
|
649
656
|
mediaObject = await camera.takePicture();
|
|
657
|
+
this.console.log(`[Exit Alert] Got snapshot: ${!!mediaObject}`);
|
|
650
658
|
}
|
|
651
659
|
} catch (e) {
|
|
652
|
-
this.console.warn('Failed to get snapshot
|
|
660
|
+
this.console.warn('[Exit Alert] Failed to get snapshot:', e);
|
|
653
661
|
}
|
|
654
662
|
}
|
|
655
663
|
|
|
656
664
|
// Generate rich exit description using topology context (now async with LLM support)
|
|
665
|
+
this.console.log(`[Exit Alert] Calling generateExitDescription with mediaObject=${!!mediaObject}`);
|
|
657
666
|
const spatialResult = await this.spatialReasoning.generateExitDescription(
|
|
658
667
|
current,
|
|
659
668
|
sighting.cameraId,
|
|
@@ -661,7 +670,7 @@ export class TrackingEngine {
|
|
|
661
670
|
);
|
|
662
671
|
|
|
663
672
|
this.console.log(
|
|
664
|
-
`Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`
|
|
673
|
+
`[Exit Alert] Object ${tracked.globalId.slice(0, 8)} exited: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`
|
|
665
674
|
);
|
|
666
675
|
|
|
667
676
|
await this.alertManager.checkAndAlert('property_exit', current, {
|