npm - @blueharford/scrypted-spatial-awareness - Versions diffs - 0.6.14 → 0.6.16 - Mend

@blueharford/scrypted-spatial-awareness 0.6.14 → 0.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/main.nodejs.js +1 -1
package/dist/main.nodejs.js.map +1 -1
package/dist/plugin.zip +0 -0
package/out/main.nodejs.js +244 -40
package/out/main.nodejs.js.map +1 -1
package/out/plugin.zip +0 -0
package/package.json +1 -1
package/src/core/topology-discovery.ts +238 -40
package/src/models/discovery.ts +17 -0

package/out/plugin.zip CHANGED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blueharford/scrypted-spatial-awareness",
-  "version": "0.6.14",
+  "version": "0.6.16",
   "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
   "author": "Joshua Seidel <blueharford>",
   "license": "Apache-2.0",

package/src/core/topology-discovery.ts CHANGED Viewed

@@ -23,6 +23,8 @@ import {
   DiscoveryStatus,
   DEFAULT_DISCOVERY_STATUS,
   RATE_LIMIT_WARNING_THRESHOLD,
+  DistanceEstimate,
+  distanceToFeet,
 } from '../models/discovery';
 import {
   CameraTopology,
@@ -40,60 +42,141 @@ interface ChatCompletionDevice extends ScryptedDevice {
 }
 /** Scene analysis prompt for single camera */
-const SCENE_ANALYSIS_PROMPT = `Analyze this security camera image and identify what you see.
+const SCENE_ANALYSIS_PROMPT = `You are analyzing a security camera image. Describe EVERYTHING you can see in detail.
+## INSTRUCTIONS
+Look at this image carefully and identify ALL visible objects, structures, and areas. Be thorough - even small or partially visible items are important for security awareness.
+## 1. LANDMARKS - List EVERY distinct object or feature you can see:
+**Structures** (buildings, parts of buildings):
+- Houses, garages, sheds, porches, decks, patios, carports, gazebos
+- Walls, pillars, columns, railings, stairs, steps
+**Vegetation** (plants, trees, landscaping):
+- Trees (describe type if identifiable: oak, palm, pine, etc.)
+- Bushes, shrubs, hedges
+- Flower beds, gardens, planters, potted plants
+- Grass/lawn areas, mulch beds
+**Boundaries & Barriers**:
+- Fences (wood, chain-link, aluminum, vinyl, iron, privacy)
+- Walls (brick, stone, concrete, retaining)
+- Gates, gate posts
+- Hedges used as boundaries
+**Access Points & Pathways**:
+- Doors (front, side, garage, screen)
+- Driveways (concrete, asphalt, gravel, pavers)
+- Walkways, sidewalks, paths, stepping stones
+- Stairs, ramps, porches
+**Utility & Fixtures**:
+- Mailboxes, package boxes
+- Light fixtures, lamp posts, solar lights
+- A/C units, utility boxes, meters
+- Trash cans, recycling bins
+- Hoses, spigots, sprinklers
+**Outdoor Items**:
+- Vehicles (cars, trucks, motorcycles, boats, trailers)
+- Furniture (chairs, tables, benches, swings)
+- Grills, fire pits, outdoor kitchens
+- Play equipment, trampolines, pools
+- Decorations, flags, signs
+**Off-Property Elements** (important for security context):
+- Street, road, sidewalk
+- Neighbor's property/fence/house
+- Public areas visible
+For EACH landmark, estimate its DISTANCE from the camera:
+- "close" = 0-10 feet (within arm's reach of camera)
+- "near" = 10-30 feet
+- "medium" = 30-60 feet
+- "far" = 60-100 feet
+- "distant" = 100+ feet (edge of property or beyond)
+## 2. ZONES - Identify distinct AREAS visible:
+- Front yard, backyard, side yard
+- Driveway, parking area
+- Patio, deck, porch
+- Garden area, lawn
+- Street/road
+- Neighbor's yard
+For each zone, estimate what percentage of the image it covers (0.0 to 1.0).
+## 3. EDGES - What's at each edge of the frame:
+This helps understand what's just out of view.
+## 4. CAMERA CONTEXT:
+- Estimated mounting height (ground level, 8ft, 12ft, roofline, etc.)
+- Approximate field of view (narrow, medium, wide)
+- Facing direction if determinable (north, south, street-facing, etc.)
-1. LANDMARKS - Identify fixed features visible:
-   - Structures (house, garage, shed, porch, deck)
-   - Features (mailbox, tree, pool, garden, fountain)
-   - Access points (door, gate, driveway entrance, walkway)
-   - Boundaries (fence, wall, hedge)
-2. ZONES - Identify area types visible:
-   - What type of area is this? (front yard, backyard, driveway, street, patio, walkway)
-   - Estimate what percentage of the frame each zone covers (0.0 to 1.0)
-3. EDGES - What's visible at the frame edges:
-   - Top edge: (sky, roof, trees, etc.)
-   - Left edge: (fence, neighbor, street, etc.)
-   - Right edge: (fence, garage, etc.)
-   - Bottom edge: (ground, driveway, grass, etc.)
-4. ORIENTATION - Estimate camera facing direction based on shadows, sun position, or landmarks
-Respond with ONLY valid JSON in this exact format:
+Respond with ONLY valid JSON:
 {
   "landmarks": [
-    {"name": "Front Door", "type": "access", "confidence": 0.9, "description": "White front door with black frame"}
+    {"name": "Mailbox", "type": "feature", "distance": "medium", "confidence": 0.95, "description": "Black metal mailbox on wooden post, approximately 40 feet from camera"},
+    {"name": "Aluminum Fence", "type": "boundary", "distance": "near", "confidence": 0.9, "description": "Silver aluminum fence running along left side of property, about 15-20 feet away"},
+    {"name": "Large Oak Tree", "type": "feature", "distance": "far", "confidence": 0.85, "description": "Mature oak tree near property line, roughly 80 feet from camera"}
   ],
   "zones": [
-    {"name": "Front Yard", "type": "yard", "coverage": 0.4, "description": "Grass lawn area"}
+    {"name": "Front Yard", "type": "yard", "coverage": 0.5, "description": "Grass lawn with some bare patches"},
+    {"name": "Driveway", "type": "driveway", "coverage": 0.25, "description": "Concrete driveway leading to garage"}
   ],
-  "edges": {"top": "sky with clouds", "left": "fence and trees", "right": "garage wall", "bottom": "concrete walkway"},
-  "orientation": "north"
-}`;
+  "edges": {
+    "top": "sky, tree canopy",
+    "left": "aluminum fence, neighbor's yard beyond",
+    "right": "side of house, garage door",
+    "bottom": "concrete walkway, grass edge"
+  },
+  "cameraContext": {
+    "mountHeight": "8 feet",
+    "fieldOfView": "wide",
+    "facingDirection": "street-facing"
+  }
+}
+BE THOROUGH. List every distinct item you can identify. A typical outdoor scene should have 5-15+ landmarks.`;
 /** Multi-camera correlation prompt */
-const CORRELATION_PROMPT = `I have scene analyses from multiple security cameras at the same property. Help me correlate them to understand the property layout.
+const CORRELATION_PROMPT = `I have detailed scene analyses from multiple security cameras at the same property. Help me understand which landmarks appear in multiple camera views.
 CAMERA SCENES:
 {scenes}
-Identify:
-1. Shared landmarks - Features that appear in multiple camera views
-2. Camera connections - How someone could move between camera views and estimated walking time
-3. Overall layout - Describe the property layout based on what you see
+## PRIORITY ORDER (most important first):
+### 1. SHARED LANDMARKS (HIGHEST PRIORITY)
+Identify features that are visible from MULTIPLE cameras. This is crucial for understanding the property layout.
+- Look for the SAME fence, tree, mailbox, driveway, structure, etc. appearing in different camera views
+- Even partial visibility counts (e.g., a tree visible in full from one camera and just the edge from another)
+- Include landmarks that are at the boundary between camera views
+### 2. PROPERTY LAYOUT
+Based on what each camera sees and their overlapping features, describe:
+- Which areas each camera covers
+- How the cameras relate spatially (e.g., "Camera A looks toward Camera B's direction")
+- Overall property shape and features
+### 3. CONNECTIONS (Lower Priority)
+Only if clearly determinable, suggest walking paths between camera views.
 IMPORTANT: For camera references, use the EXACT device ID shown in parentheses (e.g., "device_123"), NOT the camera name.
 Respond with ONLY valid JSON:
 {
   "sharedLandmarks": [
-    {"name": "Driveway", "type": "access", "seenByCameras": ["device_123", "device_456"], "confidence": 0.8, "description": "Concrete driveway"}
+    {"name": "Aluminum Fence", "type": "boundary", "seenByCameras": ["device_123", "device_456"], "confidence": 0.85, "description": "Silver aluminum fence visible on right edge of Camera A and left edge of Camera B"},
+    {"name": "Large Oak Tree", "type": "feature", "seenByCameras": ["device_123", "device_789"], "confidence": 0.9, "description": "Mature oak tree in front yard, visible from both front and side cameras"},
+    {"name": "Concrete Driveway", "type": "access", "seenByCameras": ["device_123", "device_456", "device_789"], "confidence": 0.95, "description": "Driveway visible from multiple angles"}
   ],
   "connections": [
-    {"from": "device_123", "to": "device_456", "transitSeconds": 10, "via": "driveway", "confidence": 0.7, "bidirectional": true}
+    {"from": "device_123", "to": "device_456", "transitSeconds": 8, "via": "along driveway", "confidence": 0.6, "bidirectional": true}
   ],
-  "layoutDescription": "Single-story house with front yard facing street, driveway on the left side, backyard accessible through side gate"
+  "layoutDescription": "Ranch-style house. Front camera covers front yard and street. Garage camera covers driveway entrance. Side camera covers side yard with aluminum fence separating from neighbor. Backyard camera shows deck and pool area."
 }`;
 export class TopologyDiscoveryEngine {
@@ -307,7 +390,7 @@ export class TopologyDiscoveryEngine {
               ],
             },
           ],
-          max_tokens: 1500,
+          max_tokens: 4000, // Increased for detailed scene analysis
           temperature: 0.3,
         });
@@ -320,7 +403,8 @@ export class TopologyDiscoveryEngine {
               jsonStr = jsonStr.replace(/```json?\n?/g, '').replace(/```$/g, '').trim();
             }
-            const parsed = JSON.parse(jsonStr);
+            // Try to recover truncated JSON
+            const parsed = this.parseJsonWithRecovery(jsonStr, cameraName);
             // Map parsed data to our types
             if (Array.isArray(parsed.landmarks)) {
@@ -328,6 +412,7 @@ export class TopologyDiscoveryEngine {
                 name: l.name || 'Unknown',
                 type: this.mapLandmarkType(l.type),
                 confidence: typeof l.confidence === 'number' ? l.confidence : 0.7,
+                distance: this.mapDistance(l.distance),
                 description: l.description || '',
                 boundingBox: l.boundingBox,
               }));
@@ -449,6 +534,110 @@ export class TopologyDiscoveryEngine {
     return 'unknown';
   }
+  /** Map LLM distance to our type */
+  private mapDistance(distance: string): DistanceEstimate {
+    const dist = distance?.toLowerCase();
+    if (dist?.includes('close')) return 'close';
+    if (dist?.includes('near')) return 'near';
+    if (dist?.includes('medium')) return 'medium';
+    if (dist?.includes('far') && !dist?.includes('distant')) return 'far';
+    if (dist?.includes('distant')) return 'distant';
+    return 'medium'; // Default to medium if not specified
+  }
+  /** Try to parse JSON with recovery for truncated responses */
+  private parseJsonWithRecovery(jsonStr: string, context: string): any {
+    // First, try direct parse
+    try {
+      return JSON.parse(jsonStr);
+    } catch (e) {
+      // Log the raw response for debugging (first 500 chars)
+      this.console.log(`[Discovery] Raw LLM response for ${context} (first 500 chars): ${jsonStr.substring(0, 500)}...`);
+    }
+    // Try to recover truncated JSON by finding complete sections
+    try {
+      // Find where valid JSON might end (look for last complete object/array)
+      let recoveredJson = jsonStr;
+      // Try to close unclosed strings
+      const lastQuote = recoveredJson.lastIndexOf('"');
+      const lastColon = recoveredJson.lastIndexOf(':');
+      if (lastQuote > lastColon) {
+        // We might be in the middle of a string value
+        const beforeQuote = recoveredJson.substring(0, lastQuote);
+        const afterLastCompleteEntry = beforeQuote.lastIndexOf('},');
+        if (afterLastCompleteEntry > 0) {
+          recoveredJson = beforeQuote.substring(0, afterLastCompleteEntry + 1);
+        }
+      }
+      // Close any unclosed arrays/objects
+      let openBraces = (recoveredJson.match(/{/g) || []).length;
+      let closeBraces = (recoveredJson.match(/}/g) || []).length;
+      let openBrackets = (recoveredJson.match(/\[/g) || []).length;
+      let closeBrackets = (recoveredJson.match(/\]/g) || []).length;
+      // Add missing closing brackets/braces
+      while (closeBrackets < openBrackets) {
+        recoveredJson += ']';
+        closeBrackets++;
+      }
+      while (closeBraces < openBraces) {
+        recoveredJson += '}';
+        closeBraces++;
+      }
+      const recovered = JSON.parse(recoveredJson);
+      this.console.log(`[Discovery] Recovered truncated JSON for ${context}`);
+      return recovered;
+    } catch (recoveryError) {
+      // Last resort: try to extract just landmarks array
+      try {
+        const landmarksMatch = jsonStr.match(/"landmarks"\s*:\s*\[([\s\S]*?)(?:\]|$)/);
+        const zonesMatch = jsonStr.match(/"zones"\s*:\s*\[([\s\S]*?)(?:\]|$)/);
+        const result: any = { landmarks: [], zones: [], edges: {}, orientation: 'unknown' };
+        if (landmarksMatch) {
+          // Try to parse individual landmark objects
+          const landmarksStr = landmarksMatch[1];
+          const landmarkObjects = landmarksStr.match(/\{[^{}]*\}/g) || [];
+          result.landmarks = landmarkObjects.map((obj: string) => {
+            try {
+              return JSON.parse(obj);
+            } catch {
+              return null;
+            }
+          }).filter(Boolean);
+          this.console.log(`[Discovery] Extracted ${result.landmarks.length} landmarks from partial response for ${context}`);
+        }
+        if (zonesMatch) {
+          const zonesStr = zonesMatch[1];
+          const zoneObjects = zonesStr.match(/\{[^{}]*\}/g) || [];
+          result.zones = zoneObjects.map((obj: string) => {
+            try {
+              return JSON.parse(obj);
+            } catch {
+              return null;
+            }
+          }).filter(Boolean);
+          this.console.log(`[Discovery] Extracted ${result.zones.length} zones from partial response for ${context}`);
+        }
+        if (result.landmarks.length > 0 || result.zones.length > 0) {
+          return result;
+        }
+      } catch (extractError) {
+        // Give up
+      }
+      this.console.warn(`[Discovery] Could not recover JSON for ${context}`);
+      throw new Error(`Failed to parse LLM response: truncated or malformed JSON`);
+    }
+  }
   /** Resolve a camera reference (name or deviceId) to its deviceId */
   private resolveCameraRef(ref: string): string | null {
     if (!this.topology?.cameras || !ref) return null;
@@ -644,9 +833,14 @@ export class TopologyDiscoveryEngine {
   private generateSuggestionsFromAnalysis(analysis: SceneAnalysis): void {
     if (!analysis.isValid) return;
+    this.console.log(`[Discovery] Generating suggestions from ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones`);
     // Generate landmark suggestions
     for (const landmark of analysis.landmarks) {
       if (landmark.confidence >= this.config.minLandmarkConfidence) {
+        // Calculate distance in feet from distance estimate
+        const distanceFeet = landmark.distance ? distanceToFeet(landmark.distance) : 50;
         const suggestion: DiscoverySuggestion = {
           id: `landmark_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
           type: 'landmark',
@@ -659,27 +853,31 @@ export class TopologyDiscoveryEngine {
             type: landmark.type,
             description: landmark.description,
             visibleFromCameras: [analysis.cameraId],
-            // Include bounding box for positioning (will be used by applyDiscoverySuggestion)
+            // Include extra metadata for positioning
             boundingBox: landmark.boundingBox,
-          } as any, // boundingBox is extra metadata not in Landmark interface
+            distance: landmark.distance,
+            distanceFeet: distanceFeet,
+          } as any, // Extra metadata not in base Landmark interface
         };
         this.suggestions.set(suggestion.id, suggestion);
+        this.console.log(`[Discovery] Landmark suggestion: ${landmark.name} (${landmark.type}, ${landmark.distance || 'medium'}, ~${distanceFeet}ft)`);
       }
     }
-    // Generate zone suggestions
+    // Generate zone suggestions (even for smaller coverage - 10% is enough)
     for (const zone of analysis.zones) {
-      if (zone.coverage >= 0.2) {
+      if (zone.coverage >= 0.1) {
         const suggestion: DiscoverySuggestion = {
           id: `zone_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
           type: 'zone',
           timestamp: Date.now(),
           sourceCameras: [analysis.cameraId],
-          confidence: 0.7,
+          confidence: Math.min(0.9, 0.5 + zone.coverage), // Higher coverage = higher confidence
           status: 'pending',
           zone: zone,
         };
         this.suggestions.set(suggestion.id, suggestion);
+        this.console.log(`[Discovery] Zone suggestion: ${zone.name} (${zone.type}, ${Math.round(zone.coverage * 100)}% coverage)`);
       }
     }
   }

package/src/models/discovery.ts CHANGED Viewed

@@ -58,6 +58,21 @@ export interface DiscoveredZone {
   boundingBox?: [number, number, number, number];
 }
+/** Distance estimate from camera */
+export type DistanceEstimate = 'close' | 'near' | 'medium' | 'far' | 'distant';
+/** Convert distance estimate to approximate feet */
+export function distanceToFeet(distance: DistanceEstimate): number {
+  switch (distance) {
+    case 'close': return 5;      // 0-10 feet
+    case 'near': return 20;      // 10-30 feet
+    case 'medium': return 45;    // 30-60 feet
+    case 'far': return 80;       // 60-100 feet
+    case 'distant': return 150;  // 100+ feet
+    default: return 50;
+  }
+}
 /** A landmark discovered in a camera view */
 export interface DiscoveredLandmark {
   /** Name of the landmark */
@@ -66,6 +81,8 @@ export interface DiscoveredLandmark {
   type: LandmarkType;
   /** Confidence score from LLM (0-1) */
   confidence: number;
+  /** Estimated distance from camera */
+  distance?: DistanceEstimate;
   /** Bounding box in normalized coordinates [x, y, width, height] (0-1) */
   boundingBox?: [number, number, number, number];
   /** Description from LLM analysis */