@blueharford/scrypted-spatial-awareness 0.6.7 → 0.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +151 -11
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +147 -11
- package/src/core/tracking-engine.ts +34 -6
package/dist/plugin.zip
CHANGED
|
Binary file
|
package/out/main.nodejs.js
CHANGED
|
@@ -35497,7 +35497,7 @@ class SpatialReasoningEngine {
|
|
|
35497
35497
|
return this.llmDevice !== null;
|
|
35498
35498
|
}
|
|
35499
35499
|
/** Generate entry description when object enters property */
|
|
35500
|
-
generateEntryDescription(tracked, cameraId) {
|
|
35500
|
+
async generateEntryDescription(tracked, cameraId, mediaObject) {
|
|
35501
35501
|
if (!this.topology) {
|
|
35502
35502
|
return {
|
|
35503
35503
|
description: `${this.capitalizeFirst(tracked.className)} entered property`,
|
|
@@ -35517,10 +35517,9 @@ class SpatialReasoningEngine {
|
|
|
35517
35517
|
}
|
|
35518
35518
|
const landmarks = (0, topology_1.getLandmarksVisibleFromCamera)(this.topology, cameraId);
|
|
35519
35519
|
const objectType = this.capitalizeFirst(tracked.className);
|
|
35520
|
-
// Build entry description using topology context
|
|
35520
|
+
// Build basic entry description using topology context
|
|
35521
35521
|
const location = this.describeLocation(camera, landmarks, 'to');
|
|
35522
35522
|
// Check if we can determine where they came from (e.g., street, neighbor)
|
|
35523
|
-
const entryLandmark = landmarks.find(l => l.isEntryPoint);
|
|
35524
35523
|
const streetLandmark = landmarks.find(l => l.type === 'street');
|
|
35525
35524
|
const neighborLandmark = landmarks.find(l => l.type === 'neighbor');
|
|
35526
35525
|
let source = '';
|
|
@@ -35530,15 +35529,28 @@ class SpatialReasoningEngine {
|
|
|
35530
35529
|
else if (neighborLandmark) {
|
|
35531
35530
|
source = ` from ${neighborLandmark.name}`;
|
|
35532
35531
|
}
|
|
35532
|
+
const basicDescription = `${objectType} arrived at ${location}${source}`;
|
|
35533
|
+
// Try LLM for enhanced description with visual details
|
|
35534
|
+
if (this.config.enableLlm && mediaObject) {
|
|
35535
|
+
const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'entry', mediaObject);
|
|
35536
|
+
if (llmDescription) {
|
|
35537
|
+
return {
|
|
35538
|
+
description: llmDescription,
|
|
35539
|
+
involvedLandmarks: landmarks,
|
|
35540
|
+
confidence: 0.9,
|
|
35541
|
+
usedLlm: true,
|
|
35542
|
+
};
|
|
35543
|
+
}
|
|
35544
|
+
}
|
|
35533
35545
|
return {
|
|
35534
|
-
description:
|
|
35546
|
+
description: basicDescription,
|
|
35535
35547
|
involvedLandmarks: landmarks,
|
|
35536
35548
|
confidence: 0.8,
|
|
35537
35549
|
usedLlm: false,
|
|
35538
35550
|
};
|
|
35539
35551
|
}
|
|
35540
35552
|
/** Generate exit description when object leaves property */
|
|
35541
|
-
generateExitDescription(tracked, cameraId) {
|
|
35553
|
+
async generateExitDescription(tracked, cameraId, mediaObject) {
|
|
35542
35554
|
if (!this.topology) {
|
|
35543
35555
|
return {
|
|
35544
35556
|
description: `${this.capitalizeFirst(tracked.className)} left property`,
|
|
@@ -35558,7 +35570,7 @@ class SpatialReasoningEngine {
|
|
|
35558
35570
|
}
|
|
35559
35571
|
const landmarks = (0, topology_1.getLandmarksVisibleFromCamera)(this.topology, cameraId);
|
|
35560
35572
|
const objectType = this.capitalizeFirst(tracked.className);
|
|
35561
|
-
// Build exit description
|
|
35573
|
+
// Build basic exit description
|
|
35562
35574
|
const location = this.describeLocation(camera, landmarks, 'from');
|
|
35563
35575
|
// Check for exit point landmarks
|
|
35564
35576
|
const exitLandmark = landmarks.find(l => l.isExitPoint);
|
|
@@ -35603,8 +35615,21 @@ class SpatialReasoningEngine {
|
|
|
35603
35615
|
journeyContext = ` — visited ${visitedLandmarks.join(' → ')}`;
|
|
35604
35616
|
}
|
|
35605
35617
|
}
|
|
35618
|
+
const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
|
|
35619
|
+
// Try LLM for enhanced description with visual details
|
|
35620
|
+
if (this.config.enableLlm && mediaObject) {
|
|
35621
|
+
const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'exit', mediaObject, journeyContext);
|
|
35622
|
+
if (llmDescription) {
|
|
35623
|
+
return {
|
|
35624
|
+
description: llmDescription,
|
|
35625
|
+
involvedLandmarks: landmarks,
|
|
35626
|
+
confidence: 0.9,
|
|
35627
|
+
usedLlm: true,
|
|
35628
|
+
};
|
|
35629
|
+
}
|
|
35630
|
+
}
|
|
35606
35631
|
return {
|
|
35607
|
-
description:
|
|
35632
|
+
description: basicDescription,
|
|
35608
35633
|
involvedLandmarks: landmarks,
|
|
35609
35634
|
confidence: 0.8,
|
|
35610
35635
|
usedLlm: false,
|
|
@@ -35801,6 +35826,95 @@ class SpatialReasoningEngine {
|
|
|
35801
35826
|
return null;
|
|
35802
35827
|
}
|
|
35803
35828
|
}
|
|
35829
|
+
/** Get LLM-enhanced description for entry/exit events */
|
|
35830
|
+
async getLlmEntryExitDescription(tracked, camera, landmarks, eventType, mediaObject, journeyContext) {
|
|
35831
|
+
const llm = await this.findLlmDevice();
|
|
35832
|
+
if (!llm || !llm.getChatCompletion)
|
|
35833
|
+
return null;
|
|
35834
|
+
try {
|
|
35835
|
+
// Convert image to base64 for vision LLM
|
|
35836
|
+
const imageData = await mediaObjectToBase64(mediaObject);
|
|
35837
|
+
const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
|
|
35838
|
+
const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
|
|
35839
|
+
// Build context-aware prompt
|
|
35840
|
+
const prompt = eventType === 'entry'
|
|
35841
|
+
? `You are a security camera system. Analyze this image and describe who/what just arrived.
|
|
35842
|
+
|
|
35843
|
+
CONTEXT:
|
|
35844
|
+
- Camera: ${camera.name}
|
|
35845
|
+
- Object type: ${tracked.className}
|
|
35846
|
+
- Nearby landmarks: ${landmarkNames}
|
|
35847
|
+
|
|
35848
|
+
INSTRUCTIONS:
|
|
35849
|
+
Look at the image and generate a single, natural sentence describing:
|
|
35850
|
+
1. Physical description (if person: gender, clothing, items carried; if vehicle: color, type, make)
|
|
35851
|
+
2. What they appear to be doing (arriving, approaching, etc.)
|
|
35852
|
+
3. Relevant landmark context (driveway, front door, mailbox, etc.)
|
|
35853
|
+
|
|
35854
|
+
Examples of good descriptions:
|
|
35855
|
+
- "Man in gray hoodie approaching the front door"
|
|
35856
|
+
- "Woman in scrubs arriving with shopping bags"
|
|
35857
|
+
- "White delivery van pulling into the driveway"
|
|
35858
|
+
- "UPS driver carrying package towards the porch"
|
|
35859
|
+
- "Teenager on bicycle coming up the driveway"
|
|
35860
|
+
|
|
35861
|
+
Generate ONLY the description, nothing else:`
|
|
35862
|
+
: `You are a security camera system. Analyze this image and describe who/what is leaving.
|
|
35863
|
+
|
|
35864
|
+
CONTEXT:
|
|
35865
|
+
- Camera: ${camera.name}
|
|
35866
|
+
- Object type: ${tracked.className}
|
|
35867
|
+
- Time on property: ${dwellTime > 60 ? Math.round(dwellTime / 60) + ' minutes' : dwellTime + ' seconds'}
|
|
35868
|
+
- Nearby landmarks: ${landmarkNames}
|
|
35869
|
+
${journeyContext ? `- Journey: ${journeyContext}` : ''}
|
|
35870
|
+
|
|
35871
|
+
INSTRUCTIONS:
|
|
35872
|
+
Look at the image and generate a single, natural sentence describing:
|
|
35873
|
+
1. Physical description (if person: gender, clothing, items carried; if vehicle: color, type)
|
|
35874
|
+
2. What they did (if determinable from context)
|
|
35875
|
+
3. Direction they're leaving towards
|
|
35876
|
+
|
|
35877
|
+
Examples of good descriptions:
|
|
35878
|
+
- "Man in black hoodie leaving after checking the mailbox"
|
|
35879
|
+
- "Woman in business attire heading to car in driveway"
|
|
35880
|
+
- "Red sedan backing out of the driveway"
|
|
35881
|
+
- "Delivery driver returning to FedEx truck after leaving package"
|
|
35882
|
+
- "Landscaper with leaf blower heading to work truck"
|
|
35883
|
+
|
|
35884
|
+
Generate ONLY the description, nothing else:`;
|
|
35885
|
+
// Build message content - use multimodal format if we have an image
|
|
35886
|
+
let messageContent;
|
|
35887
|
+
if (imageData) {
|
|
35888
|
+
messageContent = [
|
|
35889
|
+
{ type: 'text', text: prompt },
|
|
35890
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
35891
|
+
];
|
|
35892
|
+
}
|
|
35893
|
+
else {
|
|
35894
|
+
messageContent = prompt;
|
|
35895
|
+
}
|
|
35896
|
+
// Call LLM using ChatCompletion interface
|
|
35897
|
+
const result = await llm.getChatCompletion({
|
|
35898
|
+
messages: [
|
|
35899
|
+
{
|
|
35900
|
+
role: 'user',
|
|
35901
|
+
content: messageContent,
|
|
35902
|
+
},
|
|
35903
|
+
],
|
|
35904
|
+
max_tokens: 100,
|
|
35905
|
+
temperature: 0.7,
|
|
35906
|
+
});
|
|
35907
|
+
const content = result?.choices?.[0]?.message?.content;
|
|
35908
|
+
if (content && typeof content === 'string') {
|
|
35909
|
+
return content.trim();
|
|
35910
|
+
}
|
|
35911
|
+
return null;
|
|
35912
|
+
}
|
|
35913
|
+
catch (e) {
|
|
35914
|
+
this.console.warn(`LLM ${eventType} description generation failed:`, e);
|
|
35915
|
+
return null;
|
|
35916
|
+
}
|
|
35917
|
+
}
|
|
35804
35918
|
/** Build LLM prompt with RAG context */
|
|
35805
35919
|
buildLlmPrompt(tracked, fromCamera, toCamera, transitTime, fromLandmarks, toLandmarks, ragContext) {
|
|
35806
35920
|
const transitSecs = Math.round(transitTime / 1000);
|
|
@@ -37106,8 +37220,21 @@ class TrackingEngine {
|
|
|
37106
37220
|
// Check if we've already alerted for this object
|
|
37107
37221
|
if (this.isInAlertCooldown(globalId))
|
|
37108
37222
|
return;
|
|
37109
|
-
//
|
|
37110
|
-
|
|
37223
|
+
// Get snapshot for LLM description (if LLM is enabled)
|
|
37224
|
+
let mediaObject;
|
|
37225
|
+
if (this.config.useLlmDescriptions) {
|
|
37226
|
+
try {
|
|
37227
|
+
const camera = systemManager.getDeviceById(sighting.cameraId);
|
|
37228
|
+
if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
|
|
37229
|
+
mediaObject = await camera.takePicture();
|
|
37230
|
+
}
|
|
37231
|
+
}
|
|
37232
|
+
catch (e) {
|
|
37233
|
+
this.console.warn('Failed to get snapshot for entry description:', e);
|
|
37234
|
+
}
|
|
37235
|
+
}
|
|
37236
|
+
// Generate spatial description (now async with LLM support)
|
|
37237
|
+
const spatialResult = await this.spatialReasoning.generateEntryDescription(tracked, sighting.cameraId, mediaObject);
|
|
37111
37238
|
if (isEntryPoint) {
|
|
37112
37239
|
// Entry point - generate property entry alert
|
|
37113
37240
|
await this.alertManager.checkAndAlert('property_entry', tracked, {
|
|
@@ -37182,8 +37309,21 @@ class TrackingEngine {
|
|
|
37182
37309
|
const current = this.state.getObject(tracked.globalId);
|
|
37183
37310
|
if (current && current.state === 'pending') {
|
|
37184
37311
|
this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
|
|
37185
|
-
//
|
|
37186
|
-
|
|
37312
|
+
// Get snapshot for LLM description (if LLM is enabled)
|
|
37313
|
+
let mediaObject;
|
|
37314
|
+
if (this.config.useLlmDescriptions) {
|
|
37315
|
+
try {
|
|
37316
|
+
const camera = systemManager.getDeviceById(sighting.cameraId);
|
|
37317
|
+
if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
|
|
37318
|
+
mediaObject = await camera.takePicture();
|
|
37319
|
+
}
|
|
37320
|
+
}
|
|
37321
|
+
catch (e) {
|
|
37322
|
+
this.console.warn('Failed to get snapshot for exit description:', e);
|
|
37323
|
+
}
|
|
37324
|
+
}
|
|
37325
|
+
// Generate rich exit description using topology context (now async with LLM support)
|
|
37326
|
+
const spatialResult = await this.spatialReasoning.generateExitDescription(current, sighting.cameraId, mediaObject);
|
|
37187
37327
|
this.console.log(`Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`);
|
|
37188
37328
|
await this.alertManager.checkAndAlert('property_exit', current, {
|
|
37189
37329
|
cameraId: sighting.cameraId,
|