npm - @blueharford/scrypted-spatial-awareness - Versions diffs - 0.4.7 → 0.4.8-beta.1 - Mend

@blueharford/scrypted-spatial-awareness 0.4.7 → 0.4.8-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +62 -0
package/dist/main.nodejs.js +1 -1
package/dist/main.nodejs.js.map +1 -1
package/dist/plugin.zip +0 -0
package/out/main.nodejs.js +554 -137
package/out/main.nodejs.js.map +1 -1
package/out/plugin.zip +0 -0
package/package.json +1 -1
package/src/core/object-correlator.ts +32 -7
package/src/core/spatial-reasoning.ts +315 -44
package/src/core/tracking-engine.ts +57 -19
package/src/models/alert.ts +41 -14

package/out/plugin.zip ADDED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blueharford/scrypted-spatial-awareness",
-  "version": "0.4.7",
+  "version": "0.4.8-beta.1",
   "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
   "author": "Joshua Seidel <blueharford>",
   "license": "Apache-2.0",

package/src/core/object-correlator.ts CHANGED Viewed

@@ -46,7 +46,24 @@ export class ObjectCorrelator {
       }
     }
-    if (candidates.length === 0) return null;
+    if (candidates.length === 0) {
+      // No candidates above threshold - try to find best match with relaxed criteria
+      // This helps when there's only one object of this class active
+      const sameClassObjects = activeObjects.filter(
+        o => o.className === sighting.detection.className
+      );
+      if (sameClassObjects.length === 1) {
+        // Only one object of this class - likely the same one
+        const candidate = await this.evaluateCandidate(sameClassObjects[0], sighting);
+        // Accept with lower threshold if timing is reasonable
+        if (candidate.confidence >= 0.3 && candidate.factors.timing > 0) {
+          return candidate;
+        }
+      }
+      return null;
+    }
     // Sort by confidence (highest first)
     candidates.sort((a, b) => b.confidence - a.confidence);
@@ -137,15 +154,23 @@ export class ObjectCorrelator {
     if (!connection) {
       // No defined connection - still allow correlation based on reasonable timing
-      // Allow up to 2 minutes transit between any cameras
-      const MAX_UNCHARTED_TRANSIT = 120000; // 2 minutes
+      // Allow up to 5 minutes transit between any cameras (property could be large)
+      const MAX_UNCHARTED_TRANSIT = 300000; // 5 minutes
       if (transitTime > 0 && transitTime < MAX_UNCHARTED_TRANSIT) {
         // Score based on how reasonable the timing is
-        // Shorter transits are more likely to be the same object
-        const timingScore = Math.max(0.3, 1 - (transitTime / MAX_UNCHARTED_TRANSIT));
-        return timingScore;
+        // Give higher base score for reasonable transits (encourages matching)
+        if (transitTime < 60000) {
+          // Under 1 minute - very likely same object
+          return 0.9;
+        } else if (transitTime < 120000) {
+          // Under 2 minutes - probably same object
+          return 0.7;
+        } else {
+          // 2-5 minutes - possible but less certain
+          return Math.max(0.4, 0.7 - (transitTime - 120000) / 180000 * 0.3);
+        }
       }
-      return 0.2;
+      return 0.3; // Even long transits get some credit
     }
     const { min, typical, max } = connection.transitTime;

package/src/core/spatial-reasoning.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import sdk, {
   ObjectDetection,
   Camera,
   MediaObject,
+  ScryptedDevice,
 } from '@scrypted/sdk';
 import {
   CameraTopology,
@@ -61,11 +62,17 @@ interface ContextChunk {
   metadata: Record<string, any>;
 }
+/** Interface for ChatCompletion devices (from @scrypted/llm plugin) */
+interface ChatCompletionDevice extends ScryptedDevice {
+  getChatCompletion?(params: any): Promise<any>;
+  streamChatCompletion?(params: any): AsyncGenerator<any>;
+}
 export class SpatialReasoningEngine {
   private config: SpatialReasoningConfig;
   private console: Console;
   private topology: CameraTopology | null = null;
-  private llmDevice: ObjectDetection | null = null;
+  private llmDevice: ChatCompletionDevice | null = null;
   private contextChunks: ContextChunk[] = [];
   private topologyContextCache: string | null = null;
   private contextCacheTime: number = 0;
@@ -303,30 +310,213 @@ export class SpatialReasoningEngine {
     return relevant;
   }
-  /** Find or initialize LLM device */
-  private async findLlmDevice(): Promise<ObjectDetection | null> {
+  private llmSearched: boolean = false;
+  private llmProvider: string | null = null;
+  /** Find or initialize LLM device - looks for ChatCompletion interface from @scrypted/llm plugin */
+  private async findLlmDevice(): Promise<ChatCompletionDevice | null> {
     if (this.llmDevice) return this.llmDevice;
+    if (this.llmSearched) return null; // Already searched and found nothing
+    this.llmSearched = true;
     try {
+      // Look for devices with ChatCompletion interface (the correct interface for @scrypted/llm)
       for (const id of Object.keys(systemManager.getSystemState())) {
         const device = systemManager.getDeviceById(id);
-        if (device?.interfaces?.includes(ScryptedInterface.ObjectDetection)) {
-          const name = device.name?.toLowerCase() || '';
-          if (name.includes('llm') || name.includes('gpt') || name.includes('claude') ||
-              name.includes('ollama') || name.includes('gemini')) {
-            this.llmDevice = device as unknown as ObjectDetection;
-            this.console.log(`Found LLM device: ${device.name}`);
-            return this.llmDevice;
+        if (!device) continue;
+        // Check if this device has ChatCompletion interface
+        // The @scrypted/llm plugin exposes ChatCompletion, not ObjectDetection
+        if (device.interfaces?.includes('ChatCompletion')) {
+          const deviceName = device.name?.toLowerCase() || '';
+          const pluginId = (device as any).pluginId?.toLowerCase() || '';
+          // Identify the provider type for logging
+          let providerType = 'Unknown';
+          if (pluginId.includes('@scrypted/llm') || pluginId.includes('llm')) {
+            providerType = 'Scrypted LLM';
           }
+          if (deviceName.includes('openai') || deviceName.includes('gpt')) {
+            providerType = 'OpenAI';
+          } else if (deviceName.includes('anthropic') || deviceName.includes('claude')) {
+            providerType = 'Anthropic';
+          } else if (deviceName.includes('ollama')) {
+            providerType = 'Ollama';
+          } else if (deviceName.includes('gemini') || deviceName.includes('google')) {
+            providerType = 'Google';
+          } else if (deviceName.includes('llama')) {
+            providerType = 'llama.cpp';
+          }
+          this.llmDevice = device as unknown as ChatCompletionDevice;
+          this.llmProvider = `${providerType} (${device.name})`;
+          this.console.log(`[LLM] Connected to ${providerType}: ${device.name}`);
+          this.console.log(`[LLM] Plugin: ${pluginId || 'N/A'}`);
+          this.console.log(`[LLM] Interfaces: ${device.interfaces?.join(', ')}`);
+          return this.llmDevice;
         }
       }
+      // If we get here, no LLM plugin found
+      this.console.warn('[LLM] No ChatCompletion device found. Install @scrypted/llm for enhanced descriptions.');
+      this.console.warn('[LLM] Falling back to rule-based descriptions using topology data.');
     } catch (e) {
-      this.console.warn('Error finding LLM device:', e);
+      this.console.error('[LLM] Error searching for LLM device:', e);
     }
     return null;
   }
+  /** Get the current LLM provider name */
+  getLlmProvider(): string | null {
+    return this.llmProvider;
+  }
+  /** Check if LLM is available */
+  isLlmAvailable(): boolean {
+    return this.llmDevice !== null;
+  }
+  /** Generate entry description when object enters property */
+  generateEntryDescription(
+    tracked: TrackedObject,
+    cameraId: string
+  ): SpatialReasoningResult {
+    if (!this.topology) {
+      return {
+        description: `${this.capitalizeFirst(tracked.className)} entered property`,
+        involvedLandmarks: [],
+        confidence: 0.5,
+        usedLlm: false,
+      };
+    }
+    const camera = findCamera(this.topology, cameraId);
+    if (!camera) {
+      return {
+        description: `${this.capitalizeFirst(tracked.className)} entered property`,
+        involvedLandmarks: [],
+        confidence: 0.5,
+        usedLlm: false,
+      };
+    }
+    const landmarks = getLandmarksVisibleFromCamera(this.topology, cameraId);
+    const objectType = this.capitalizeFirst(tracked.className);
+    // Build entry description using topology context
+    const location = this.describeLocation(camera, landmarks, 'to');
+    // Check if we can determine where they came from (e.g., street, neighbor)
+    const entryLandmark = landmarks.find(l => l.isEntryPoint);
+    const streetLandmark = landmarks.find(l => l.type === 'street');
+    const neighborLandmark = landmarks.find(l => l.type === 'neighbor');
+    let source = '';
+    if (streetLandmark) {
+      source = ` from ${streetLandmark.name}`;
+    } else if (neighborLandmark) {
+      source = ` from ${neighborLandmark.name}`;
+    }
+    return {
+      description: `${objectType} arrived at ${location}${source}`,
+      involvedLandmarks: landmarks,
+      confidence: 0.8,
+      usedLlm: false,
+    };
+  }
+  /** Generate exit description when object leaves property */
+  generateExitDescription(
+    tracked: TrackedObject,
+    cameraId: string
+  ): SpatialReasoningResult {
+    if (!this.topology) {
+      return {
+        description: `${this.capitalizeFirst(tracked.className)} left property`,
+        involvedLandmarks: [],
+        confidence: 0.5,
+        usedLlm: false,
+      };
+    }
+    const camera = findCamera(this.topology, cameraId);
+    if (!camera) {
+      return {
+        description: `${this.capitalizeFirst(tracked.className)} left property`,
+        involvedLandmarks: [],
+        confidence: 0.5,
+        usedLlm: false,
+      };
+    }
+    const landmarks = getLandmarksVisibleFromCamera(this.topology, cameraId);
+    const objectType = this.capitalizeFirst(tracked.className);
+    // Build exit description
+    const location = this.describeLocation(camera, landmarks, 'from');
+    // Check for exit point landmarks
+    const exitLandmark = landmarks.find(l => l.isExitPoint);
+    const streetLandmark = landmarks.find(l => l.type === 'street');
+    let destination = '';
+    if (streetLandmark) {
+      destination = ` towards ${streetLandmark.name}`;
+    } else if (exitLandmark) {
+      destination = ` via ${exitLandmark.name}`;
+    }
+    // Include time on property if available
+    const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
+    let timeContext = '';
+    if (dwellTime > 60) {
+      timeContext = ` after ${Math.round(dwellTime / 60)}m on property`;
+    } else if (dwellTime > 10) {
+      timeContext = ` after ${dwellTime}s`;
+    }
+    // Summarize journey if they visited multiple cameras (use landmarks from topology)
+    let journeyContext = '';
+    if (tracked.journey.length > 0 && this.topology) {
+      const visitedLandmarks: string[] = [];
+      // Get landmarks from entry camera
+      if (tracked.entryCamera) {
+        const entryLandmarks = getLandmarksVisibleFromCamera(this.topology, tracked.entryCamera);
+        const entryLandmark = entryLandmarks.find(l => l.isEntryPoint || l.type === 'access') || entryLandmarks[0];
+        if (entryLandmark) {
+          visitedLandmarks.push(entryLandmark.name);
+        }
+      }
+      // Get landmarks from journey segments
+      for (const segment of tracked.journey) {
+        const segmentLandmarks = getLandmarksVisibleFromCamera(this.topology, segment.toCameraId);
+        const segmentLandmark = segmentLandmarks.find(l =>
+          !visitedLandmarks.includes(l.name) && (l.type === 'access' || l.type === 'zone' || l.type === 'structure')
+        );
+        if (segmentLandmark && !visitedLandmarks.includes(segmentLandmark.name)) {
+          visitedLandmarks.push(segmentLandmark.name);
+        }
+      }
+      if (visitedLandmarks.length > 1) {
+        journeyContext = ` — visited ${visitedLandmarks.join(' → ')}`;
+      }
+    }
+    return {
+      description: `${objectType} left ${location}${destination}${timeContext}${journeyContext}`,
+      involvedLandmarks: landmarks,
+      confidence: 0.8,
+      usedLlm: false,
+    };
+  }
   /** Generate rich movement description using LLM */
   async generateMovementDescription(
     tracked: TrackedObject,
@@ -415,28 +605,92 @@ export class SpatialReasoningEngine {
     const objectType = this.capitalizeFirst(tracked.className);
     const transitSecs = Math.round(transitTime / 1000);
-    // Build origin description
-    let origin = fromCamera.name;
-    if (fromLandmarks.length > 0) {
-      const nearLandmark = fromLandmarks[0];
-      origin = `near ${nearLandmark.name}`;
-    } else if (fromCamera.context?.coverageDescription) {
-      origin = fromCamera.context.coverageDescription.split('.')[0];
-    }
+    // Get connection for path context
+    const connection = this.topology ? findConnection(this.topology, fromCamera.deviceId, toCamera.deviceId) : null;
+    // Build origin description using landmarks, camera context, or camera name
+    let origin = this.describeLocation(fromCamera, fromLandmarks, 'from');
     // Build destination description
-    let destination = toCamera.name;
-    if (toLandmarks.length > 0) {
-      const nearLandmark = toLandmarks[0];
-      destination = `towards ${nearLandmark.name}`;
-    } else if (toCamera.context?.coverageDescription) {
-      destination = `towards ${toCamera.context.coverageDescription.split('.')[0]}`;
+    let destination = this.describeLocation(toCamera, toLandmarks, 'to');
+    // Check if we have a named path/connection
+    let pathContext = '';
+    if (connection?.name) {
+      pathContext = ` via ${connection.name}`;
+    } else if (connection?.pathLandmarks?.length && this.topology) {
+      const pathNames = connection.pathLandmarks
+        .map(id => findLandmark(this.topology!, id)?.name)
+        .filter(Boolean);
+      if (pathNames.length > 0) {
+        pathContext = ` past ${pathNames.join(' and ')}`;
+      }
+    }
+    // Include journey context if this is not the first camera
+    let journeyContext = '';
+    if (tracked.journey.length > 0) {
+      const totalTime = Math.round((Date.now() - tracked.firstSeen) / 1000);
+      if (totalTime > 60) {
+        journeyContext = ` (${Math.round(totalTime / 60)}m on property)`;
+      }
+    }
+    // Determine movement verb based on transit time and object type
+    const verb = this.getMovementVerb(tracked.className, transitSecs);
+    return `${objectType} ${verb} ${origin} heading ${destination}${pathContext}${journeyContext}`;
+  }
+  /** Describe a location using landmarks, camera context, or camera name */
+  private describeLocation(camera: CameraNode, landmarks: Landmark[], direction: 'from' | 'to'): string {
+    // Priority 1: Use entry/exit landmarks
+    const entryExitLandmark = landmarks.find(l =>
+      (direction === 'from' && l.isExitPoint) || (direction === 'to' && l.isEntryPoint)
+    );
+    if (entryExitLandmark) {
+      return direction === 'from' ? `the ${entryExitLandmark.name}` : `the ${entryExitLandmark.name}`;
+    }
+    // Priority 2: Use access landmarks (driveway, walkway, etc.)
+    const accessLandmark = landmarks.find(l => l.type === 'access');
+    if (accessLandmark) {
+      return `the ${accessLandmark.name}`;
     }
-    // Build transit string
-    const transitStr = transitSecs > 0 ? ` (${transitSecs}s)` : '';
+    // Priority 3: Use zone landmarks (front yard, back yard)
+    const zoneLandmark = landmarks.find(l => l.type === 'zone');
+    if (zoneLandmark) {
+      return `the ${zoneLandmark.name}`;
+    }
+    // Priority 4: Use any landmark
+    if (landmarks.length > 0) {
+      return `near ${landmarks[0].name}`;
+    }
-    return `${objectType} moving from ${origin} ${destination}${transitStr}`;
+    // Priority 5: Use camera coverage description
+    if (camera.context?.coverageDescription) {
+      const desc = camera.context.coverageDescription.split('.')[0].toLowerCase();
+      return `the ${desc}`;
+    }
+    // Fallback: Generic description (no camera name inference - use topology for context)
+    return direction === 'from' ? 'property' : 'property';
+  }
+  /** Get appropriate movement verb based on context */
+  private getMovementVerb(className: string, transitSecs: number): string {
+    if (className === 'car' || className === 'vehicle' || className === 'truck') {
+      return transitSecs < 10 ? 'driving from' : 'moved from';
+    }
+    if (transitSecs < 5) {
+      return 'walking from';
+    }
+    if (transitSecs < 30) {
+      return 'moved from';
+    }
+    return 'traveled from';
   }
   /** Build path description from connection */
@@ -458,7 +712,7 @@ export class SpatialReasoningEngine {
     return connection.name || undefined;
   }
-  /** Get LLM-enhanced description */
+  /** Get LLM-enhanced description using ChatCompletion interface */
   private async getLlmEnhancedDescription(
     tracked: TrackedObject,
     fromCamera: CameraNode,
@@ -469,7 +723,7 @@ export class SpatialReasoningEngine {
     mediaObject: MediaObject
   ): Promise<string | null> {
     const llm = await this.findLlmDevice();
-    if (!llm) return null;
+    if (!llm || !llm.getChatCompletion) return null;
     try {
       // Retrieve relevant context for RAG
@@ -492,14 +746,22 @@ export class SpatialReasoningEngine {
         ragContext
       );
-      // Call LLM
-      const result = await llm.detectObjects(mediaObject, {
-        settings: { prompt }
-      } as any);
+      // Call LLM using ChatCompletion interface
+      const result = await llm.getChatCompletion({
+        messages: [
+          {
+            role: 'user',
+            content: prompt,
+          },
+        ],
+        max_tokens: 150,
+        temperature: 0.7,
+      });
-      // Extract description from result
-      if (result.detections?.[0]?.label) {
-        return result.detections[0].label;
+      // Extract description from ChatCompletion result
+      const content = result?.choices?.[0]?.message?.content;
+      if (content && typeof content === 'string') {
+        return content.trim();
       }
       return null;
@@ -547,7 +809,7 @@ Examples of good descriptions:
 Generate ONLY the description, nothing else:`;
   }
-  /** Suggest a new landmark based on AI analysis */
+  /** Suggest a new landmark based on AI analysis using ChatCompletion */
   async suggestLandmark(
     cameraId: string,
     mediaObject: MediaObject,
@@ -557,7 +819,7 @@ Generate ONLY the description, nothing else:`;
     if (!this.config.enableLandmarkLearning) return null;
     const llm = await this.findLlmDevice();
-    if (!llm) return null;
+    if (!llm || !llm.getChatCompletion) return null;
     try {
       const prompt = `Analyze this security camera image. A ${objectClass} was detected.
@@ -573,13 +835,22 @@ If you can identify a clear landmark feature, respond with ONLY a JSON object:
 If no clear landmark is identifiable, respond with: {"name": null}`;
-      const result = await llm.detectObjects(mediaObject, {
-        settings: { prompt }
-      } as any);
+      // Call LLM using ChatCompletion interface
+      const result = await llm.getChatCompletion({
+        messages: [
+          {
+            role: 'user',
+            content: prompt,
+          },
+        ],
+        max_tokens: 100,
+        temperature: 0.3,
+      });
-      if (result.detections?.[0]?.label) {
+      const content = result?.choices?.[0]?.message?.content;
+      if (content && typeof content === 'string') {
         try {
-          const parsed = JSON.parse(result.detections[0].label);
+          const parsed = JSON.parse(content.trim());
           if (parsed.name && parsed.type) {
             const suggestionId = `suggest_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;

package/src/core/tracking-engine.ts CHANGED Viewed

@@ -516,31 +516,61 @@ export class TrackingEngine {
         `(ID: ${globalId.slice(0, 8)})`
       );
-      // Generate entry alert if this is an entry point
-      // Entry alerts also respect loitering threshold and cooldown
-      if (isEntryPoint && this.passesLoiteringThreshold(tracked) && !this.isInAlertCooldown(globalId)) {
-        // Get spatial reasoning for entry event
-        const spatialResult = await this.getSpatialDescription(
-          tracked,
-          'outside', // Virtual "outside" location for entry
-          sighting.cameraId,
-          0,
-          sighting.cameraId
-        );
+      // Schedule loitering check - alert after object passes loitering threshold
+      // This ensures we don't miss alerts for brief appearances while still filtering noise
+      this.scheduleLoiteringAlert(globalId, sighting, isEntryPoint);
+    }
+  }
+  /** Schedule an alert after loitering threshold passes */
+  private scheduleLoiteringAlert(
+    globalId: GlobalTrackingId,
+    sighting: ObjectSighting,
+    isEntryPoint: boolean
+  ): void {
+    // Check after loitering threshold if object is still being tracked
+    setTimeout(async () => {
+      const tracked = this.state.getObject(globalId);
+      if (!tracked || tracked.state !== 'active') return;
+      // Check if we've already alerted for this object
+      if (this.isInAlertCooldown(globalId)) return;
+      // Generate spatial description
+      const spatialResult = this.spatialReasoning.generateEntryDescription(
+        tracked,
+        sighting.cameraId
+      );
+      if (isEntryPoint) {
+        // Entry point - generate property entry alert
         await this.alertManager.checkAndAlert('property_entry', tracked, {
           cameraId: sighting.cameraId,
           cameraName: sighting.cameraName,
           objectClass: sighting.detection.className,
-          objectLabel: spatialResult?.description || sighting.detection.label,
+          objectLabel: spatialResult.description,
           detectionId: sighting.detectionId,
-          involvedLandmarks: spatialResult?.involvedLandmarks?.map(l => l.name),
-          usedLlm: spatialResult?.usedLlm,
+          involvedLandmarks: spatialResult.involvedLandmarks?.map(l => l.name),
+          usedLlm: spatialResult.usedLlm,
+        });
+      } else {
+        // Non-entry point - still alert about activity using movement alert type
+        // This notifies about any activity around the property using topology context
+        await this.alertManager.checkAndAlert('movement', tracked, {
+          cameraId: sighting.cameraId,
+          cameraName: sighting.cameraName,
+          toCameraId: sighting.cameraId,
+          toCameraName: sighting.cameraName,
+          objectClass: sighting.detection.className,
+          objectLabel: spatialResult.description, // Use spatial reasoning description (topology-based)
+          detectionId: sighting.detectionId,
+          involvedLandmarks: spatialResult.involvedLandmarks?.map(l => l.name),
+          usedLlm: spatialResult.usedLlm,
         });
-        this.recordAlertTime(globalId);
       }
-    }
+      this.recordAlertTime(globalId);
+    }, this.config.loiteringThreshold);
   }
   /** Attempt to correlate a sighting with existing tracked objects */
@@ -596,15 +626,23 @@ export class TrackingEngine {
       if (current && current.state === 'pending') {
         this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
+        // Generate rich exit description using topology context
+        const spatialResult = this.spatialReasoning.generateExitDescription(
+          current,
+          sighting.cameraId
+        );
         this.console.log(
-          `Object ${tracked.globalId.slice(0, 8)} exited via ${sighting.cameraName}`
+          `Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`
         );
         await this.alertManager.checkAndAlert('property_exit', current, {
           cameraId: sighting.cameraId,
           cameraName: sighting.cameraName,
           objectClass: current.className,
-          objectLabel: current.label,
+          objectLabel: spatialResult.description,
+          involvedLandmarks: spatialResult.involvedLandmarks?.map(l => l.name),
+          usedLlm: spatialResult.usedLlm,
         });
       }
       this.pendingTimers.delete(tracked.globalId);