npm - @blueharford/scrypted-spatial-awareness - Versions diffs - 0.6.8 → 0.6.10 - Mend

@blueharford/scrypted-spatial-awareness 0.6.8 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/main.nodejs.js +1 -1
package/dist/main.nodejs.js.map +1 -1
package/dist/plugin.zip +0 -0
package/out/main.nodejs.js +83 -24
package/out/main.nodejs.js.map +1 -1
package/out/plugin.zip +0 -0
package/package.json +1 -1
package/src/core/spatial-reasoning.ts +73 -21
package/src/core/tracking-engine.ts +12 -3

package/dist/plugin.zip CHANGED Viewed

Binary file

package/out/main.nodejs.js CHANGED Viewed

@@ -35531,9 +35531,12 @@ class SpatialReasoningEngine {
         }
         const basicDescription = `${objectType} arrived at ${location}${source}`;
         // Try LLM for enhanced description with visual details
+        this.console.log(`[Entry] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
         if (this.config.enableLlm && mediaObject) {
+            this.console.log(`[Entry] Attempting LLM description for entry event`);
             const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'entry', mediaObject);
             if (llmDescription) {
+                this.console.log(`[Entry] LLM returned: ${llmDescription.substring(0, 50)}...`);
                 return {
                     description: llmDescription,
                     involvedLandmarks: landmarks,
@@ -35541,6 +35544,10 @@ class SpatialReasoningEngine {
                     usedLlm: true,
                 };
             }
+            this.console.warn(`[Entry] LLM returned null, falling back to basic`);
+        }
+        else {
+            this.console.log(`[Entry] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
         }
         return {
             description: basicDescription,
@@ -35617,9 +35624,12 @@ class SpatialReasoningEngine {
         }
         const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
         // Try LLM for enhanced description with visual details
+        this.console.log(`[Exit] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
         if (this.config.enableLlm && mediaObject) {
+            this.console.log(`[Exit] Attempting LLM description for exit event`);
             const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'exit', mediaObject, journeyContext);
             if (llmDescription) {
+                this.console.log(`[Exit] LLM returned: ${llmDescription.substring(0, 50)}...`);
                 return {
                     description: llmDescription,
                     involvedLandmarks: landmarks,
@@ -35627,6 +35637,10 @@ class SpatialReasoningEngine {
                     usedLlm: true,
                 };
             }
+            this.console.warn(`[Exit] LLM returned null, falling back to basic`);
+        }
+        else {
+            this.console.log(`[Exit] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
         }
         return {
             description: basicDescription,
@@ -35828,12 +35842,21 @@ class SpatialReasoningEngine {
     }
     /** Get LLM-enhanced description for entry/exit events */
     async getLlmEntryExitDescription(tracked, camera, landmarks, eventType, mediaObject, journeyContext) {
+        this.console.log(`[LLM] getLlmEntryExitDescription called for ${eventType} event`);
         const llm = await this.findLlmDevice();
-        if (!llm || !llm.getChatCompletion)
+        if (!llm) {
+            this.console.warn(`[LLM] No LLM device found for ${eventType} description`);
             return null;
+        }
+        if (!llm.getChatCompletion) {
+            this.console.warn(`[LLM] LLM device has no getChatCompletion method`);
+            return null;
+        }
+        this.console.log(`[LLM] Using LLM device: ${this.llmProvider}`);
         try {
             // Convert image to base64 for vision LLM
             const imageData = await mediaObjectToBase64(mediaObject);
+            this.console.log(`[LLM] Image converted: ${imageData ? 'success' : 'failed'}, type: ${imageData?.mediaType}`);
             const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
             const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
             // Build context-aware prompt
@@ -35882,36 +35905,63 @@ Examples of good descriptions:
 - "Landscaper with leaf blower heading to work truck"
 Generate ONLY the description, nothing else:`;
-            // Build message content - use multimodal format if we have an image
-            let messageContent;
+            // Try multimodal format first, fall back to text-only if it fails
+            let result;
+            let usedVision = false;
             if (imageData) {
-                messageContent = [
-                    { type: 'text', text: prompt },
-                    buildImageContent(imageData, this.llmProviderType),
-                ];
+                // First attempt: Try multimodal with image
+                try {
+                    this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
+                    const multimodalContent = [
+                        { type: 'text', text: prompt },
+                        buildImageContent(imageData, this.llmProviderType),
+                    ];
+                    result = await llm.getChatCompletion({
+                        messages: [
+                            {
+                                role: 'user',
+                                content: multimodalContent,
+                            },
+                        ],
+                        max_tokens: 100,
+                        temperature: 0.7,
+                    });
+                    usedVision = true;
+                }
+                catch (visionError) {
+                    // If vision format fails, try text-only
+                    if (isVisionFormatError(visionError)) {
+                        this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
+                    }
+                    else {
+                        this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
+                    }
+                }
             }
-            else {
-                messageContent = prompt;
+            // If no result yet, try text-only
+            if (!result) {
+                this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
+                result = await llm.getChatCompletion({
+                    messages: [
+                        {
+                            role: 'user',
+                            content: prompt,
+                        },
+                    ],
+                    max_tokens: 100,
+                    temperature: 0.7,
+                });
             }
-            // Call LLM using ChatCompletion interface
-            const result = await llm.getChatCompletion({
-                messages: [
-                    {
-                        role: 'user',
-                        content: messageContent,
-                    },
-                ],
-                max_tokens: 100,
-                temperature: 0.7,
-            });
             const content = result?.choices?.[0]?.message?.content;
             if (content && typeof content === 'string') {
+                this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
                 return content.trim();
             }
+            this.console.warn(`[LLM] No content in response for ${eventType}`);
             return null;
         }
         catch (e) {
-            this.console.warn(`LLM ${eventType} description generation failed:`, e);
+            this.console.warn(`[LLM] ${eventType} description generation failed:`, e);
             return null;
         }
     }
@@ -37222,19 +37272,24 @@ class TrackingEngine {
                 return;
             // Get snapshot for LLM description (if LLM is enabled)
             let mediaObject;
+            this.console.log(`[Entry Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
             if (this.config.useLlmDescriptions) {
                 try {
                     const camera = systemManager.getDeviceById(sighting.cameraId);
+                    this.console.log(`[Entry Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)}`);
                     if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
                         mediaObject = await camera.takePicture();
+                        this.console.log(`[Entry Alert] Got snapshot: ${!!mediaObject}`);
                     }
                 }
                 catch (e) {
-                    this.console.warn('Failed to get snapshot for entry description:', e);
+                    this.console.warn('[Entry Alert] Failed to get snapshot:', e);
                 }
             }
             // Generate spatial description (now async with LLM support)
+            this.console.log(`[Entry Alert] Calling generateEntryDescription with mediaObject=${!!mediaObject}`);
             const spatialResult = await this.spatialReasoning.generateEntryDescription(tracked, sighting.cameraId, mediaObject);
+            this.console.log(`[Entry Alert] Got description: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
             if (isEntryPoint) {
                 // Entry point - generate property entry alert
                 await this.alertManager.checkAndAlert('property_entry', tracked, {
@@ -37311,20 +37366,24 @@ class TrackingEngine {
                 this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
                 // Get snapshot for LLM description (if LLM is enabled)
                 let mediaObject;
+                this.console.log(`[Exit Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
                 if (this.config.useLlmDescriptions) {
                     try {
                         const camera = systemManager.getDeviceById(sighting.cameraId);
+                        this.console.log(`[Exit Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)}`);
                         if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
                             mediaObject = await camera.takePicture();
+                            this.console.log(`[Exit Alert] Got snapshot: ${!!mediaObject}`);
                         }
                     }
                     catch (e) {
-                        this.console.warn('Failed to get snapshot for exit description:', e);
+                        this.console.warn('[Exit Alert] Failed to get snapshot:', e);
                     }
                 }
                 // Generate rich exit description using topology context (now async with LLM support)
+                this.console.log(`[Exit Alert] Calling generateExitDescription with mediaObject=${!!mediaObject}`);
                 const spatialResult = await this.spatialReasoning.generateExitDescription(current, sighting.cameraId, mediaObject);
-                this.console.log(`Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`);
+                this.console.log(`[Exit Alert] Object ${tracked.globalId.slice(0, 8)} exited: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
                 await this.alertManager.checkAndAlert('property_exit', current, {
                     cameraId: sighting.cameraId,
                     cameraName: sighting.cameraName,