npm - @blueharford/scrypted-spatial-awareness - Versions diffs - 0.6.6 → 0.6.8 - Mend

@blueharford/scrypted-spatial-awareness 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/main.nodejs.js +1 -1
package/dist/main.nodejs.js.map +1 -1
package/dist/plugin.zip +0 -0
package/out/main.nodejs.js +185 -12
package/out/main.nodejs.js.map +1 -1
package/out/plugin.zip +0 -0
package/package.json +1 -1
package/src/core/spatial-reasoning.ts +147 -11
package/src/core/tracking-engine.ts +34 -6
package/src/main.ts +39 -1

package/out/plugin.zip CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blueharford/scrypted-spatial-awareness",
-  "version": "0.6.6",
+  "version": "0.6.8",
   "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
   "author": "Joshua Seidel <blueharford>",
   "license": "Apache-2.0",

package/src/core/spatial-reasoning.ts CHANGED Viewed

@@ -545,10 +545,11 @@ export class SpatialReasoningEngine {
   }
   /** Generate entry description when object enters property */
-  generateEntryDescription(
+  async generateEntryDescription(
     tracked: TrackedObject,
-    cameraId: string
-  ): SpatialReasoningResult {
+    cameraId: string,
+    mediaObject?: MediaObject
+  ): Promise<SpatialReasoningResult> {
     if (!this.topology) {
       return {
         description: `${this.capitalizeFirst(tracked.className)} entered property`,
@@ -571,11 +572,10 @@ export class SpatialReasoningEngine {
     const landmarks = getLandmarksVisibleFromCamera(this.topology, cameraId);
     const objectType = this.capitalizeFirst(tracked.className);
-    // Build entry description using topology context
+    // Build basic entry description using topology context
     const location = this.describeLocation(camera, landmarks, 'to');
     // Check if we can determine where they came from (e.g., street, neighbor)
-    const entryLandmark = landmarks.find(l => l.isEntryPoint);
     const streetLandmark = landmarks.find(l => l.type === 'street');
     const neighborLandmark = landmarks.find(l => l.type === 'neighbor');
@@ -586,8 +586,25 @@ export class SpatialReasoningEngine {
       source = ` from ${neighborLandmark.name}`;
     }
+    const basicDescription = `${objectType} arrived at ${location}${source}`;
+    // Try LLM for enhanced description with visual details
+    if (this.config.enableLlm && mediaObject) {
+      const llmDescription = await this.getLlmEntryExitDescription(
+        tracked, camera, landmarks, 'entry', mediaObject
+      );
+      if (llmDescription) {
+        return {
+          description: llmDescription,
+          involvedLandmarks: landmarks,
+          confidence: 0.9,
+          usedLlm: true,
+        };
+      }
+    }
     return {
-      description: `${objectType} arrived at ${location}${source}`,
+      description: basicDescription,
       involvedLandmarks: landmarks,
       confidence: 0.8,
       usedLlm: false,
@@ -595,10 +612,11 @@ export class SpatialReasoningEngine {
   }
   /** Generate exit description when object leaves property */
-  generateExitDescription(
+  async generateExitDescription(
     tracked: TrackedObject,
-    cameraId: string
-  ): SpatialReasoningResult {
+    cameraId: string,
+    mediaObject?: MediaObject
+  ): Promise<SpatialReasoningResult> {
     if (!this.topology) {
       return {
         description: `${this.capitalizeFirst(tracked.className)} left property`,
@@ -621,7 +639,7 @@ export class SpatialReasoningEngine {
     const landmarks = getLandmarksVisibleFromCamera(this.topology, cameraId);
     const objectType = this.capitalizeFirst(tracked.className);
-    // Build exit description
+    // Build basic exit description
     const location = this.describeLocation(camera, landmarks, 'from');
     // Check for exit point landmarks
@@ -674,8 +692,25 @@ export class SpatialReasoningEngine {
       }
     }
+    const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
+    // Try LLM for enhanced description with visual details
+    if (this.config.enableLlm && mediaObject) {
+      const llmDescription = await this.getLlmEntryExitDescription(
+        tracked, camera, landmarks, 'exit', mediaObject, journeyContext
+      );
+      if (llmDescription) {
+        return {
+          description: llmDescription,
+          involvedLandmarks: landmarks,
+          confidence: 0.9,
+          usedLlm: true,
+        };
+      }
+    }
     return {
-      description: `${objectType} left ${location}${destination}${timeContext}${journeyContext}`,
+      description: basicDescription,
       involvedLandmarks: landmarks,
       confidence: 0.8,
       usedLlm: false,
@@ -952,6 +987,107 @@ export class SpatialReasoningEngine {
     }
   }
+  /** Get LLM-enhanced description for entry/exit events */
+  private async getLlmEntryExitDescription(
+    tracked: TrackedObject,
+    camera: CameraNode,
+    landmarks: Landmark[],
+    eventType: 'entry' | 'exit',
+    mediaObject: MediaObject,
+    journeyContext?: string
+  ): Promise<string | null> {
+    const llm = await this.findLlmDevice();
+    if (!llm || !llm.getChatCompletion) return null;
+    try {
+      // Convert image to base64 for vision LLM
+      const imageData = await mediaObjectToBase64(mediaObject);
+      const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
+      const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
+      // Build context-aware prompt
+      const prompt = eventType === 'entry'
+        ? `You are a security camera system. Analyze this image and describe who/what just arrived.
+CONTEXT:
+- Camera: ${camera.name}
+- Object type: ${tracked.className}
+- Nearby landmarks: ${landmarkNames}
+INSTRUCTIONS:
+Look at the image and generate a single, natural sentence describing:
+1. Physical description (if person: gender, clothing, items carried; if vehicle: color, type, make)
+2. What they appear to be doing (arriving, approaching, etc.)
+3. Relevant landmark context (driveway, front door, mailbox, etc.)
+Examples of good descriptions:
+- "Man in gray hoodie approaching the front door"
+- "Woman in scrubs arriving with shopping bags"
+- "White delivery van pulling into the driveway"
+- "UPS driver carrying package towards the porch"
+- "Teenager on bicycle coming up the driveway"
+Generate ONLY the description, nothing else:`
+        : `You are a security camera system. Analyze this image and describe who/what is leaving.
+CONTEXT:
+- Camera: ${camera.name}
+- Object type: ${tracked.className}
+- Time on property: ${dwellTime > 60 ? Math.round(dwellTime / 60) + ' minutes' : dwellTime + ' seconds'}
+- Nearby landmarks: ${landmarkNames}
+${journeyContext ? `- Journey: ${journeyContext}` : ''}
+INSTRUCTIONS:
+Look at the image and generate a single, natural sentence describing:
+1. Physical description (if person: gender, clothing, items carried; if vehicle: color, type)
+2. What they did (if determinable from context)
+3. Direction they're leaving towards
+Examples of good descriptions:
+- "Man in black hoodie leaving after checking the mailbox"
+- "Woman in business attire heading to car in driveway"
+- "Red sedan backing out of the driveway"
+- "Delivery driver returning to FedEx truck after leaving package"
+- "Landscaper with leaf blower heading to work truck"
+Generate ONLY the description, nothing else:`;
+      // Build message content - use multimodal format if we have an image
+      let messageContent: any;
+      if (imageData) {
+        messageContent = [
+          { type: 'text', text: prompt },
+          buildImageContent(imageData, this.llmProviderType),
+        ];
+      } else {
+        messageContent = prompt;
+      }
+      // Call LLM using ChatCompletion interface
+      const result = await llm.getChatCompletion({
+        messages: [
+          {
+            role: 'user',
+            content: messageContent,
+          },
+        ],
+        max_tokens: 100,
+        temperature: 0.7,
+      });
+      const content = result?.choices?.[0]?.message?.content;
+      if (content && typeof content === 'string') {
+        return content.trim();
+      }
+      return null;
+    } catch (e) {
+      this.console.warn(`LLM ${eventType} description generation failed:`, e);
+      return null;
+    }
+  }
   /** Build LLM prompt with RAG context */
   private buildLlmPrompt(
     tracked: TrackedObject,

package/src/core/tracking-engine.ts CHANGED Viewed

@@ -536,10 +536,24 @@ export class TrackingEngine {
       // Check if we've already alerted for this object
       if (this.isInAlertCooldown(globalId)) return;
-      // Generate spatial description
-      const spatialResult = this.spatialReasoning.generateEntryDescription(
+      // Get snapshot for LLM description (if LLM is enabled)
+      let mediaObject: MediaObject | undefined;
+      if (this.config.useLlmDescriptions) {
+        try {
+          const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
+          if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
+            mediaObject = await camera.takePicture();
+          }
+        } catch (e) {
+          this.console.warn('Failed to get snapshot for entry description:', e);
+        }
+      }
+      // Generate spatial description (now async with LLM support)
+      const spatialResult = await this.spatialReasoning.generateEntryDescription(
         tracked,
-        sighting.cameraId
+        sighting.cameraId,
+        mediaObject
       );
       if (isEntryPoint) {
@@ -626,10 +640,24 @@ export class TrackingEngine {
       if (current && current.state === 'pending') {
         this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
-        // Generate rich exit description using topology context
-        const spatialResult = this.spatialReasoning.generateExitDescription(
+        // Get snapshot for LLM description (if LLM is enabled)
+        let mediaObject: MediaObject | undefined;
+        if (this.config.useLlmDescriptions) {
+          try {
+            const camera = systemManager.getDeviceById<Camera>(sighting.cameraId);
+            if (camera?.interfaces?.includes(ScryptedInterface.Camera)) {
+              mediaObject = await camera.takePicture();
+            }
+          } catch (e) {
+            this.console.warn('Failed to get snapshot for exit description:', e);
+          }
+        }
+        // Generate rich exit description using topology context (now async with LLM support)
+        const spatialResult = await this.spatialReasoning.generateExitDescription(
           current,
-          sighting.cameraId
+          sighting.cameraId,
+          mediaObject
         );
         this.console.log(

package/src/main.ts CHANGED Viewed

@@ -1584,7 +1584,7 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
     }
   }
-  private handleTrainingEndRequest(response: HttpResponse): void {
+  private async handleTrainingEndRequest(response: HttpResponse): Promise<void> {
     if (!this.trackingEngine) {
       response.send(JSON.stringify({ error: 'Tracking engine not running' }), {
         code: 500,
@@ -1595,6 +1595,44 @@ export class SpatialAwarenessPlugin extends ScryptedDeviceBase
     const session = this.trackingEngine.endTrainingSession();
     if (session) {
+      // Get unique visited cameras
+      const visitedCameraIds = [...new Set(session.visits.map(v => v.cameraId))];
+      // Auto-run discovery on visited cameras to detect landmarks and zones
+      if (this.discoveryEngine && visitedCameraIds.length > 0) {
+        this.console.log(`[Training] Running discovery analysis on ${visitedCameraIds.length} visited cameras...`);
+        let landmarksFound = 0;
+        let zonesFound = 0;
+        for (const cameraId of visitedCameraIds) {
+          try {
+            const analysis = await this.discoveryEngine.analyzeScene(cameraId);
+            if (analysis.isValid) {
+              landmarksFound += analysis.landmarks.length;
+              zonesFound += analysis.zones.length;
+              this.console.log(`[Training] ${cameraId}: Found ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones`);
+            }
+          } catch (e) {
+            this.console.warn(`[Training] Failed to analyze ${cameraId}:`, e);
+          }
+        }
+        // Get all pending suggestions and auto-accept them
+        const suggestions = this.discoveryEngine.getPendingSuggestions();
+        for (const suggestion of suggestions) {
+          this.applyDiscoverySuggestion(suggestion);
+          this.discoveryEngine.acceptSuggestion(suggestion.id);
+        }
+        // Persist topology after applying suggestions
+        if (suggestions.length > 0 && this.trackingEngine) {
+          const updatedTopology = this.trackingEngine.getTopology();
+          await this.storageSettings.putSetting('topology', JSON.stringify(updatedTopology));
+          this.console.log(`[Training] Auto-applied ${suggestions.length} discoveries (${landmarksFound} landmarks, ${zonesFound} zones)`);
+        }
+      }
       response.send(JSON.stringify(session), {
         headers: { 'Content-Type': 'application/json' },
       });