@blueharford/scrypted-spatial-awareness 0.6.8 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blueharford/scrypted-spatial-awareness",
3
- "version": "0.6.8",
3
+ "version": "0.6.10",
4
4
  "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
5
5
  "author": "Joshua Seidel <blueharford>",
6
6
  "license": "Apache-2.0",
@@ -589,11 +589,14 @@ export class SpatialReasoningEngine {
589
589
  const basicDescription = `${objectType} arrived at ${location}${source}`;
590
590
 
591
591
  // Try LLM for enhanced description with visual details
592
+ this.console.log(`[Entry] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
592
593
  if (this.config.enableLlm && mediaObject) {
594
+ this.console.log(`[Entry] Attempting LLM description for entry event`);
593
595
  const llmDescription = await this.getLlmEntryExitDescription(
594
596
  tracked, camera, landmarks, 'entry', mediaObject
595
597
  );
596
598
  if (llmDescription) {
599
+ this.console.log(`[Entry] LLM returned: ${llmDescription.substring(0, 50)}...`);
597
600
  return {
598
601
  description: llmDescription,
599
602
  involvedLandmarks: landmarks,
@@ -601,6 +604,9 @@ export class SpatialReasoningEngine {
601
604
  usedLlm: true,
602
605
  };
603
606
  }
607
+ this.console.warn(`[Entry] LLM returned null, falling back to basic`);
608
+ } else {
609
+ this.console.log(`[Entry] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
604
610
  }
605
611
 
606
612
  return {
@@ -695,11 +701,14 @@ export class SpatialReasoningEngine {
695
701
  const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
696
702
 
697
703
  // Try LLM for enhanced description with visual details
704
+ this.console.log(`[Exit] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
698
705
  if (this.config.enableLlm && mediaObject) {
706
+ this.console.log(`[Exit] Attempting LLM description for exit event`);
699
707
  const llmDescription = await this.getLlmEntryExitDescription(
700
708
  tracked, camera, landmarks, 'exit', mediaObject, journeyContext
701
709
  );
702
710
  if (llmDescription) {
711
+ this.console.log(`[Exit] LLM returned: ${llmDescription.substring(0, 50)}...`);
703
712
  return {
704
713
  description: llmDescription,
705
714
  involvedLandmarks: landmarks,
@@ -707,6 +716,9 @@ export class SpatialReasoningEngine {
707
716
  usedLlm: true,
708
717
  };
709
718
  }
719
+ this.console.warn(`[Exit] LLM returned null, falling back to basic`);
720
+ } else {
721
+ this.console.log(`[Exit] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
710
722
  }
711
723
 
712
724
  return {
@@ -996,12 +1008,24 @@ export class SpatialReasoningEngine {
996
1008
  mediaObject: MediaObject,
997
1009
  journeyContext?: string
998
1010
  ): Promise<string | null> {
1011
+ this.console.log(`[LLM] getLlmEntryExitDescription called for ${eventType} event`);
1012
+
999
1013
  const llm = await this.findLlmDevice();
1000
- if (!llm || !llm.getChatCompletion) return null;
1014
+ if (!llm) {
1015
+ this.console.warn(`[LLM] No LLM device found for ${eventType} description`);
1016
+ return null;
1017
+ }
1018
+ if (!llm.getChatCompletion) {
1019
+ this.console.warn(`[LLM] LLM device has no getChatCompletion method`);
1020
+ return null;
1021
+ }
1022
+
1023
+ this.console.log(`[LLM] Using LLM device: ${this.llmProvider}`);
1001
1024
 
1002
1025
  try {
1003
1026
  // Convert image to base64 for vision LLM
1004
1027
  const imageData = await mediaObjectToBase64(mediaObject);
1028
+ this.console.log(`[LLM] Image converted: ${imageData ? 'success' : 'failed'}, type: ${imageData?.mediaType}`);
1005
1029
 
1006
1030
  const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
1007
1031
  const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
@@ -1053,37 +1077,65 @@ Examples of good descriptions:
1053
1077
 
1054
1078
  Generate ONLY the description, nothing else:`;
1055
1079
 
1056
- // Build message content - use multimodal format if we have an image
1057
- let messageContent: any;
1080
+ // Try multimodal format first, fall back to text-only if it fails
1081
+ let result: any;
1082
+ let usedVision = false;
1083
+
1058
1084
  if (imageData) {
1059
- messageContent = [
1060
- { type: 'text', text: prompt },
1061
- buildImageContent(imageData, this.llmProviderType),
1062
- ];
1063
- } else {
1064
- messageContent = prompt;
1085
+ // First attempt: Try multimodal with image
1086
+ try {
1087
+ this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
1088
+ const multimodalContent = [
1089
+ { type: 'text', text: prompt },
1090
+ buildImageContent(imageData, this.llmProviderType),
1091
+ ];
1092
+
1093
+ result = await llm.getChatCompletion({
1094
+ messages: [
1095
+ {
1096
+ role: 'user',
1097
+ content: multimodalContent,
1098
+ },
1099
+ ],
1100
+ max_tokens: 100,
1101
+ temperature: 0.7,
1102
+ });
1103
+ usedVision = true;
1104
+ } catch (visionError: any) {
1105
+ // If vision format fails, try text-only
1106
+ if (isVisionFormatError(visionError)) {
1107
+ this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
1108
+ } else {
1109
+ this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
1110
+ }
1111
+ }
1065
1112
  }
1066
1113
 
1067
- // Call LLM using ChatCompletion interface
1068
- const result = await llm.getChatCompletion({
1069
- messages: [
1070
- {
1071
- role: 'user',
1072
- content: messageContent,
1073
- },
1074
- ],
1075
- max_tokens: 100,
1076
- temperature: 0.7,
1077
- });
1114
+ // If no result yet, try text-only
1115
+ if (!result) {
1116
+ this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
1117
+ result = await llm.getChatCompletion({
1118
+ messages: [
1119
+ {
1120
+ role: 'user',
1121
+ content: prompt,
1122
+ },
1123
+ ],
1124
+ max_tokens: 100,
1125
+ temperature: 0.7,
1126
+ });
1127
+ }
1078
1128
 
1079
1129
  const content = result?.choices?.[0]?.message?.content;
1080
1130
  if (content && typeof content === 'string') {
1131
+ this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
1081
1132
  return content.trim();
1082
1133
  }
1083
1134
 
1135
+ this.console.warn(`[LLM] No content in response for ${eventType}`);
1084
1136
  return null;
1085
1137
  } catch (e) {
1086
- this.console.warn(`LLM ${eventType} description generation failed:`, e);
1138
+ this.console.warn(`[LLM] ${eventType} description generation failed:`, e);
1087
1139
  return null;
1088
1140
  }
1089
1141
  }
@@ -538,23 +538,28 @@ export class TrackingEngine {
538
538
 
539
539
  // Get snapshot for LLM description (if LLM is enabled)
540
540
  let mediaObject: MediaObject | undefined;
541
+ this.console.log(`[Entry Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
541
542
  if (this.config.useLlmDescriptions) {
542
543
  try {
543
544
  const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
545
+ this.console.log(`[Entry Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(ScryptedInterface.Camera)}`);
544
546
  if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
545
547
  mediaObject = await camera.takePicture();
548
+ this.console.log(`[Entry Alert] Got snapshot: ${!!mediaObject}`);
546
549
  }
547
550
  } catch (e) {
548
- this.console.warn('Failed to get snapshot for entry description:', e);
551
+ this.console.warn('[Entry Alert] Failed to get snapshot:', e);
549
552
  }
550
553
  }
551
554
 
552
555
  // Generate spatial description (now async with LLM support)
556
+ this.console.log(`[Entry Alert] Calling generateEntryDescription with mediaObject=${!!mediaObject}`);
553
557
  const spatialResult = await this.spatialReasoning.generateEntryDescription(
554
558
  tracked,
555
559
  sighting.cameraId,
556
560
  mediaObject
557
561
  );
562
+ this.console.log(`[Entry Alert] Got description: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
558
563
 
559
564
  if (isEntryPoint) {
560
565
  // Entry point - generate property entry alert
@@ -642,18 +647,22 @@ export class TrackingEngine {
642
647
 
643
648
  // Get snapshot for LLM description (if LLM is enabled)
644
649
  let mediaObject: MediaObject | undefined;
650
+ this.console.log(`[Exit Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
645
651
  if (this.config.useLlmDescriptions) {
646
652
  try {
647
653
  const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
654
+ this.console.log(`[Exit Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(ScryptedInterface.Camera)}`);
648
655
  if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
649
656
  mediaObject = await camera.takePicture();
657
+ this.console.log(`[Exit Alert] Got snapshot: ${!!mediaObject}`);
650
658
  }
651
659
  } catch (e) {
652
- this.console.warn('Failed to get snapshot for exit description:', e);
660
+ this.console.warn('[Exit Alert] Failed to get snapshot:', e);
653
661
  }
654
662
  }
655
663
 
656
664
  // Generate rich exit description using topology context (now async with LLM support)
665
+ this.console.log(`[Exit Alert] Calling generateExitDescription with mediaObject=${!!mediaObject}`);
657
666
  const spatialResult = await this.spatialReasoning.generateExitDescription(
658
667
  current,
659
668
  sighting.cameraId,
@@ -661,7 +670,7 @@ export class TrackingEngine {
661
670
  );
662
671
 
663
672
  this.console.log(
664
- `Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`
673
+ `[Exit Alert] Object ${tracked.globalId.slice(0, 8)} exited: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`
665
674
  );
666
675
 
667
676
  await this.alertManager.checkAndAlert('property_exit', current, {