@blueharford/scrypted-spatial-awareness 0.6.8 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +83 -24
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +73 -21
- package/src/core/tracking-engine.ts +12 -3
package/dist/plugin.zip
CHANGED
|
Binary file
|
package/out/main.nodejs.js
CHANGED
|
@@ -35531,9 +35531,12 @@ class SpatialReasoningEngine {
|
|
|
35531
35531
|
}
|
|
35532
35532
|
const basicDescription = `${objectType} arrived at ${location}${source}`;
|
|
35533
35533
|
// Try LLM for enhanced description with visual details
|
|
35534
|
+
this.console.log(`[Entry] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
|
|
35534
35535
|
if (this.config.enableLlm && mediaObject) {
|
|
35536
|
+
this.console.log(`[Entry] Attempting LLM description for entry event`);
|
|
35535
35537
|
const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'entry', mediaObject);
|
|
35536
35538
|
if (llmDescription) {
|
|
35539
|
+
this.console.log(`[Entry] LLM returned: ${llmDescription.substring(0, 50)}...`);
|
|
35537
35540
|
return {
|
|
35538
35541
|
description: llmDescription,
|
|
35539
35542
|
involvedLandmarks: landmarks,
|
|
@@ -35541,6 +35544,10 @@ class SpatialReasoningEngine {
|
|
|
35541
35544
|
usedLlm: true,
|
|
35542
35545
|
};
|
|
35543
35546
|
}
|
|
35547
|
+
this.console.warn(`[Entry] LLM returned null, falling back to basic`);
|
|
35548
|
+
}
|
|
35549
|
+
else {
|
|
35550
|
+
this.console.log(`[Entry] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
|
|
35544
35551
|
}
|
|
35545
35552
|
return {
|
|
35546
35553
|
description: basicDescription,
|
|
@@ -35617,9 +35624,12 @@ class SpatialReasoningEngine {
|
|
|
35617
35624
|
}
|
|
35618
35625
|
const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
|
|
35619
35626
|
// Try LLM for enhanced description with visual details
|
|
35627
|
+
this.console.log(`[Exit] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
|
|
35620
35628
|
if (this.config.enableLlm && mediaObject) {
|
|
35629
|
+
this.console.log(`[Exit] Attempting LLM description for exit event`);
|
|
35621
35630
|
const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'exit', mediaObject, journeyContext);
|
|
35622
35631
|
if (llmDescription) {
|
|
35632
|
+
this.console.log(`[Exit] LLM returned: ${llmDescription.substring(0, 50)}...`);
|
|
35623
35633
|
return {
|
|
35624
35634
|
description: llmDescription,
|
|
35625
35635
|
involvedLandmarks: landmarks,
|
|
@@ -35627,6 +35637,10 @@ class SpatialReasoningEngine {
|
|
|
35627
35637
|
usedLlm: true,
|
|
35628
35638
|
};
|
|
35629
35639
|
}
|
|
35640
|
+
this.console.warn(`[Exit] LLM returned null, falling back to basic`);
|
|
35641
|
+
}
|
|
35642
|
+
else {
|
|
35643
|
+
this.console.log(`[Exit] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
|
|
35630
35644
|
}
|
|
35631
35645
|
return {
|
|
35632
35646
|
description: basicDescription,
|
|
@@ -35828,12 +35842,21 @@ class SpatialReasoningEngine {
|
|
|
35828
35842
|
}
|
|
35829
35843
|
/** Get LLM-enhanced description for entry/exit events */
|
|
35830
35844
|
async getLlmEntryExitDescription(tracked, camera, landmarks, eventType, mediaObject, journeyContext) {
|
|
35845
|
+
this.console.log(`[LLM] getLlmEntryExitDescription called for ${eventType} event`);
|
|
35831
35846
|
const llm = await this.findLlmDevice();
|
|
35832
|
-
if (!llm
|
|
35847
|
+
if (!llm) {
|
|
35848
|
+
this.console.warn(`[LLM] No LLM device found for ${eventType} description`);
|
|
35833
35849
|
return null;
|
|
35850
|
+
}
|
|
35851
|
+
if (!llm.getChatCompletion) {
|
|
35852
|
+
this.console.warn(`[LLM] LLM device has no getChatCompletion method`);
|
|
35853
|
+
return null;
|
|
35854
|
+
}
|
|
35855
|
+
this.console.log(`[LLM] Using LLM device: ${this.llmProvider}`);
|
|
35834
35856
|
try {
|
|
35835
35857
|
// Convert image to base64 for vision LLM
|
|
35836
35858
|
const imageData = await mediaObjectToBase64(mediaObject);
|
|
35859
|
+
this.console.log(`[LLM] Image converted: ${imageData ? 'success' : 'failed'}, type: ${imageData?.mediaType}`);
|
|
35837
35860
|
const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
|
|
35838
35861
|
const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
|
|
35839
35862
|
// Build context-aware prompt
|
|
@@ -35882,36 +35905,63 @@ Examples of good descriptions:
|
|
|
35882
35905
|
- "Landscaper with leaf blower heading to work truck"
|
|
35883
35906
|
|
|
35884
35907
|
Generate ONLY the description, nothing else:`;
|
|
35885
|
-
//
|
|
35886
|
-
let
|
|
35908
|
+
// Try multimodal format first, fall back to text-only if it fails
|
|
35909
|
+
let result;
|
|
35910
|
+
let usedVision = false;
|
|
35887
35911
|
if (imageData) {
|
|
35888
|
-
|
|
35889
|
-
|
|
35890
|
-
|
|
35891
|
-
|
|
35912
|
+
// First attempt: Try multimodal with image
|
|
35913
|
+
try {
|
|
35914
|
+
this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
|
|
35915
|
+
const multimodalContent = [
|
|
35916
|
+
{ type: 'text', text: prompt },
|
|
35917
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
35918
|
+
];
|
|
35919
|
+
result = await llm.getChatCompletion({
|
|
35920
|
+
messages: [
|
|
35921
|
+
{
|
|
35922
|
+
role: 'user',
|
|
35923
|
+
content: multimodalContent,
|
|
35924
|
+
},
|
|
35925
|
+
],
|
|
35926
|
+
max_tokens: 100,
|
|
35927
|
+
temperature: 0.7,
|
|
35928
|
+
});
|
|
35929
|
+
usedVision = true;
|
|
35930
|
+
}
|
|
35931
|
+
catch (visionError) {
|
|
35932
|
+
// If vision format fails, try text-only
|
|
35933
|
+
if (isVisionFormatError(visionError)) {
|
|
35934
|
+
this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
|
|
35935
|
+
}
|
|
35936
|
+
else {
|
|
35937
|
+
this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
|
|
35938
|
+
}
|
|
35939
|
+
}
|
|
35892
35940
|
}
|
|
35893
|
-
|
|
35894
|
-
|
|
35941
|
+
// If no result yet, try text-only
|
|
35942
|
+
if (!result) {
|
|
35943
|
+
this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
|
|
35944
|
+
result = await llm.getChatCompletion({
|
|
35945
|
+
messages: [
|
|
35946
|
+
{
|
|
35947
|
+
role: 'user',
|
|
35948
|
+
content: prompt,
|
|
35949
|
+
},
|
|
35950
|
+
],
|
|
35951
|
+
max_tokens: 100,
|
|
35952
|
+
temperature: 0.7,
|
|
35953
|
+
});
|
|
35895
35954
|
}
|
|
35896
|
-
// Call LLM using ChatCompletion interface
|
|
35897
|
-
const result = await llm.getChatCompletion({
|
|
35898
|
-
messages: [
|
|
35899
|
-
{
|
|
35900
|
-
role: 'user',
|
|
35901
|
-
content: messageContent,
|
|
35902
|
-
},
|
|
35903
|
-
],
|
|
35904
|
-
max_tokens: 100,
|
|
35905
|
-
temperature: 0.7,
|
|
35906
|
-
});
|
|
35907
35955
|
const content = result?.choices?.[0]?.message?.content;
|
|
35908
35956
|
if (content && typeof content === 'string') {
|
|
35957
|
+
this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
|
|
35909
35958
|
return content.trim();
|
|
35910
35959
|
}
|
|
35960
|
+
this.console.warn(`[LLM] No content in response for ${eventType}`);
|
|
35911
35961
|
return null;
|
|
35912
35962
|
}
|
|
35913
35963
|
catch (e) {
|
|
35914
|
-
this.console.warn(`LLM ${eventType} description generation failed:`, e);
|
|
35964
|
+
this.console.warn(`[LLM] ${eventType} description generation failed:`, e);
|
|
35915
35965
|
return null;
|
|
35916
35966
|
}
|
|
35917
35967
|
}
|
|
@@ -37222,19 +37272,24 @@ class TrackingEngine {
|
|
|
37222
37272
|
return;
|
|
37223
37273
|
// Get snapshot for LLM description (if LLM is enabled)
|
|
37224
37274
|
let mediaObject;
|
|
37275
|
+
this.console.log(`[Entry Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
|
|
37225
37276
|
if (this.config.useLlmDescriptions) {
|
|
37226
37277
|
try {
|
|
37227
37278
|
const camera = systemManager.getDeviceById(sighting.cameraId);
|
|
37279
|
+
this.console.log(`[Entry Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)}`);
|
|
37228
37280
|
if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
|
|
37229
37281
|
mediaObject = await camera.takePicture();
|
|
37282
|
+
this.console.log(`[Entry Alert] Got snapshot: ${!!mediaObject}`);
|
|
37230
37283
|
}
|
|
37231
37284
|
}
|
|
37232
37285
|
catch (e) {
|
|
37233
|
-
this.console.warn('Failed to get snapshot
|
|
37286
|
+
this.console.warn('[Entry Alert] Failed to get snapshot:', e);
|
|
37234
37287
|
}
|
|
37235
37288
|
}
|
|
37236
37289
|
// Generate spatial description (now async with LLM support)
|
|
37290
|
+
this.console.log(`[Entry Alert] Calling generateEntryDescription with mediaObject=${!!mediaObject}`);
|
|
37237
37291
|
const spatialResult = await this.spatialReasoning.generateEntryDescription(tracked, sighting.cameraId, mediaObject);
|
|
37292
|
+
this.console.log(`[Entry Alert] Got description: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
|
|
37238
37293
|
if (isEntryPoint) {
|
|
37239
37294
|
// Entry point - generate property entry alert
|
|
37240
37295
|
await this.alertManager.checkAndAlert('property_entry', tracked, {
|
|
@@ -37311,20 +37366,24 @@ class TrackingEngine {
|
|
|
37311
37366
|
this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
|
|
37312
37367
|
// Get snapshot for LLM description (if LLM is enabled)
|
|
37313
37368
|
let mediaObject;
|
|
37369
|
+
this.console.log(`[Exit Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
|
|
37314
37370
|
if (this.config.useLlmDescriptions) {
|
|
37315
37371
|
try {
|
|
37316
37372
|
const camera = systemManager.getDeviceById(sighting.cameraId);
|
|
37373
|
+
this.console.log(`[Exit Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)}`);
|
|
37317
37374
|
if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
|
|
37318
37375
|
mediaObject = await camera.takePicture();
|
|
37376
|
+
this.console.log(`[Exit Alert] Got snapshot: ${!!mediaObject}`);
|
|
37319
37377
|
}
|
|
37320
37378
|
}
|
|
37321
37379
|
catch (e) {
|
|
37322
|
-
this.console.warn('Failed to get snapshot
|
|
37380
|
+
this.console.warn('[Exit Alert] Failed to get snapshot:', e);
|
|
37323
37381
|
}
|
|
37324
37382
|
}
|
|
37325
37383
|
// Generate rich exit description using topology context (now async with LLM support)
|
|
37384
|
+
this.console.log(`[Exit Alert] Calling generateExitDescription with mediaObject=${!!mediaObject}`);
|
|
37326
37385
|
const spatialResult = await this.spatialReasoning.generateExitDescription(current, sighting.cameraId, mediaObject);
|
|
37327
|
-
this.console.log(`Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`);
|
|
37386
|
+
this.console.log(`[Exit Alert] Object ${tracked.globalId.slice(0, 8)} exited: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
|
|
37328
37387
|
await this.alertManager.checkAndAlert('property_exit', current, {
|
|
37329
37388
|
cameraId: sighting.cameraId,
|
|
37330
37389
|
cameraName: sighting.cameraName,
|