@blueharford/scrypted-spatial-awareness 0.6.8 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.zip CHANGED
Binary file
@@ -35531,9 +35531,12 @@ class SpatialReasoningEngine {
35531
35531
  }
35532
35532
  const basicDescription = `${objectType} arrived at ${location}${source}`;
35533
35533
  // Try LLM for enhanced description with visual details
35534
+ this.console.log(`[Entry] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
35534
35535
  if (this.config.enableLlm && mediaObject) {
35536
+ this.console.log(`[Entry] Attempting LLM description for entry event`);
35535
35537
  const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'entry', mediaObject);
35536
35538
  if (llmDescription) {
35539
+ this.console.log(`[Entry] LLM returned: ${llmDescription.substring(0, 50)}...`);
35537
35540
  return {
35538
35541
  description: llmDescription,
35539
35542
  involvedLandmarks: landmarks,
@@ -35541,6 +35544,10 @@ class SpatialReasoningEngine {
35541
35544
  usedLlm: true,
35542
35545
  };
35543
35546
  }
35547
+ this.console.warn(`[Entry] LLM returned null, falling back to basic`);
35548
+ }
35549
+ else {
35550
+ this.console.log(`[Entry] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
35544
35551
  }
35545
35552
  return {
35546
35553
  description: basicDescription,
@@ -35617,9 +35624,12 @@ class SpatialReasoningEngine {
35617
35624
  }
35618
35625
  const basicDescription = `${objectType} left ${location}${destination}${timeContext}${journeyContext}`;
35619
35626
  // Try LLM for enhanced description with visual details
35627
+ this.console.log(`[Exit] enableLlm=${this.config.enableLlm}, hasMediaObject=${!!mediaObject}`);
35620
35628
  if (this.config.enableLlm && mediaObject) {
35629
+ this.console.log(`[Exit] Attempting LLM description for exit event`);
35621
35630
  const llmDescription = await this.getLlmEntryExitDescription(tracked, camera, landmarks, 'exit', mediaObject, journeyContext);
35622
35631
  if (llmDescription) {
35632
+ this.console.log(`[Exit] LLM returned: ${llmDescription.substring(0, 50)}...`);
35623
35633
  return {
35624
35634
  description: llmDescription,
35625
35635
  involvedLandmarks: landmarks,
@@ -35627,6 +35637,10 @@ class SpatialReasoningEngine {
35627
35637
  usedLlm: true,
35628
35638
  };
35629
35639
  }
35640
+ this.console.warn(`[Exit] LLM returned null, falling back to basic`);
35641
+ }
35642
+ else {
35643
+ this.console.log(`[Exit] Skipping LLM (enableLlm=${this.config.enableLlm}, mediaObject=${!!mediaObject})`);
35630
35644
  }
35631
35645
  return {
35632
35646
  description: basicDescription,
@@ -35828,12 +35842,21 @@ class SpatialReasoningEngine {
35828
35842
  }
35829
35843
  /** Get LLM-enhanced description for entry/exit events */
35830
35844
  async getLlmEntryExitDescription(tracked, camera, landmarks, eventType, mediaObject, journeyContext) {
35845
+ this.console.log(`[LLM] getLlmEntryExitDescription called for ${eventType} event`);
35831
35846
  const llm = await this.findLlmDevice();
35832
- if (!llm || !llm.getChatCompletion)
35847
+ if (!llm) {
35848
+ this.console.warn(`[LLM] No LLM device found for ${eventType} description`);
35833
35849
  return null;
35850
+ }
35851
+ if (!llm.getChatCompletion) {
35852
+ this.console.warn(`[LLM] LLM device has no getChatCompletion method`);
35853
+ return null;
35854
+ }
35855
+ this.console.log(`[LLM] Using LLM device: ${this.llmProvider}`);
35834
35856
  try {
35835
35857
  // Convert image to base64 for vision LLM
35836
35858
  const imageData = await mediaObjectToBase64(mediaObject);
35859
+ this.console.log(`[LLM] Image converted: ${imageData ? 'success' : 'failed'}, type: ${imageData?.mediaType}`);
35837
35860
  const landmarkNames = landmarks.map(l => l.name).join(', ') || 'none identified';
35838
35861
  const dwellTime = Math.round((tracked.lastSeen - tracked.firstSeen) / 1000);
35839
35862
  // Build context-aware prompt
@@ -35882,36 +35905,63 @@ Examples of good descriptions:
35882
35905
  - "Landscaper with leaf blower heading to work truck"
35883
35906
 
35884
35907
  Generate ONLY the description, nothing else:`;
35885
- // Build message content - use multimodal format if we have an image
35886
- let messageContent;
35908
+ // Try multimodal format first, fall back to text-only if it fails
35909
+ let result;
35910
+ let usedVision = false;
35887
35911
  if (imageData) {
35888
- messageContent = [
35889
- { type: 'text', text: prompt },
35890
- buildImageContent(imageData, this.llmProviderType),
35891
- ];
35912
+ // First attempt: Try multimodal with image
35913
+ try {
35914
+ this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
35915
+ const multimodalContent = [
35916
+ { type: 'text', text: prompt },
35917
+ buildImageContent(imageData, this.llmProviderType),
35918
+ ];
35919
+ result = await llm.getChatCompletion({
35920
+ messages: [
35921
+ {
35922
+ role: 'user',
35923
+ content: multimodalContent,
35924
+ },
35925
+ ],
35926
+ max_tokens: 100,
35927
+ temperature: 0.7,
35928
+ });
35929
+ usedVision = true;
35930
+ }
35931
+ catch (visionError) {
35932
+ // If vision format fails, try text-only
35933
+ if (isVisionFormatError(visionError)) {
35934
+ this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
35935
+ }
35936
+ else {
35937
+ this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
35938
+ }
35939
+ }
35892
35940
  }
35893
- else {
35894
- messageContent = prompt;
35941
+ // If no result yet, try text-only
35942
+ if (!result) {
35943
+ this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
35944
+ result = await llm.getChatCompletion({
35945
+ messages: [
35946
+ {
35947
+ role: 'user',
35948
+ content: prompt,
35949
+ },
35950
+ ],
35951
+ max_tokens: 100,
35952
+ temperature: 0.7,
35953
+ });
35895
35954
  }
35896
- // Call LLM using ChatCompletion interface
35897
- const result = await llm.getChatCompletion({
35898
- messages: [
35899
- {
35900
- role: 'user',
35901
- content: messageContent,
35902
- },
35903
- ],
35904
- max_tokens: 100,
35905
- temperature: 0.7,
35906
- });
35907
35955
  const content = result?.choices?.[0]?.message?.content;
35908
35956
  if (content && typeof content === 'string') {
35957
+ this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
35909
35958
  return content.trim();
35910
35959
  }
35960
+ this.console.warn(`[LLM] No content in response for ${eventType}`);
35911
35961
  return null;
35912
35962
  }
35913
35963
  catch (e) {
35914
- this.console.warn(`LLM ${eventType} description generation failed:`, e);
35964
+ this.console.warn(`[LLM] ${eventType} description generation failed:`, e);
35915
35965
  return null;
35916
35966
  }
35917
35967
  }
@@ -37222,19 +37272,24 @@ class TrackingEngine {
37222
37272
  return;
37223
37273
  // Get snapshot for LLM description (if LLM is enabled)
37224
37274
  let mediaObject;
37275
+ this.console.log(`[Entry Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
37225
37276
  if (this.config.useLlmDescriptions) {
37226
37277
  try {
37227
37278
  const camera = systemManager.getDeviceById(sighting.cameraId);
37279
+ this.console.log(`[Entry Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)}`);
37228
37280
  if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
37229
37281
  mediaObject = await camera.takePicture();
37282
+ this.console.log(`[Entry Alert] Got snapshot: ${!!mediaObject}`);
37230
37283
  }
37231
37284
  }
37232
37285
  catch (e) {
37233
- this.console.warn('Failed to get snapshot for entry description:', e);
37286
+ this.console.warn('[Entry Alert] Failed to get snapshot:', e);
37234
37287
  }
37235
37288
  }
37236
37289
  // Generate spatial description (now async with LLM support)
37290
+ this.console.log(`[Entry Alert] Calling generateEntryDescription with mediaObject=${!!mediaObject}`);
37237
37291
  const spatialResult = await this.spatialReasoning.generateEntryDescription(tracked, sighting.cameraId, mediaObject);
37292
+ this.console.log(`[Entry Alert] Got description: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
37238
37293
  if (isEntryPoint) {
37239
37294
  // Entry point - generate property entry alert
37240
37295
  await this.alertManager.checkAndAlert('property_entry', tracked, {
@@ -37311,20 +37366,24 @@ class TrackingEngine {
37311
37366
  this.state.markExited(tracked.globalId, sighting.cameraId, sighting.cameraName);
37312
37367
  // Get snapshot for LLM description (if LLM is enabled)
37313
37368
  let mediaObject;
37369
+ this.console.log(`[Exit Alert] useLlmDescriptions=${this.config.useLlmDescriptions}`);
37314
37370
  if (this.config.useLlmDescriptions) {
37315
37371
  try {
37316
37372
  const camera = systemManager.getDeviceById(sighting.cameraId);
37373
+ this.console.log(`[Exit Alert] Camera ${sighting.cameraId} has Camera interface: ${camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)}`);
37317
37374
  if (camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
37318
37375
  mediaObject = await camera.takePicture();
37376
+ this.console.log(`[Exit Alert] Got snapshot: ${!!mediaObject}`);
37319
37377
  }
37320
37378
  }
37321
37379
  catch (e) {
37322
- this.console.warn('Failed to get snapshot for exit description:', e);
37380
+ this.console.warn('[Exit Alert] Failed to get snapshot:', e);
37323
37381
  }
37324
37382
  }
37325
37383
  // Generate rich exit description using topology context (now async with LLM support)
37384
+ this.console.log(`[Exit Alert] Calling generateExitDescription with mediaObject=${!!mediaObject}`);
37326
37385
  const spatialResult = await this.spatialReasoning.generateExitDescription(current, sighting.cameraId, mediaObject);
37327
- this.console.log(`Object ${tracked.globalId.slice(0, 8)} exited: ${spatialResult.description}`);
37386
+ this.console.log(`[Exit Alert] Object ${tracked.globalId.slice(0, 8)} exited: "${spatialResult.description.substring(0, 60)}...", usedLlm=${spatialResult.usedLlm}`);
37328
37387
  await this.alertManager.checkAndAlert('property_exit', current, {
37329
37388
  cameraId: sighting.cameraId,
37330
37389
  cameraName: sighting.cameraName,