@blueharford/scrypted-spatial-awareness 0.6.9 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.zip CHANGED
Binary file
@@ -35905,32 +35905,56 @@ Examples of good descriptions:
35905
35905
  - "Landscaper with leaf blower heading to work truck"
35906
35906
 
35907
35907
  Generate ONLY the description, nothing else:`;
35908
- // Build message content - use multimodal format if we have an image
35909
- let messageContent;
35908
+ // Try multimodal format first, fall back to text-only if it fails
35909
+ let result;
35910
+ let usedVision = false;
35910
35911
  if (imageData) {
35911
- messageContent = [
35912
- { type: 'text', text: prompt },
35913
- buildImageContent(imageData, this.llmProviderType),
35914
- ];
35912
+ // First attempt: Try multimodal with image
35913
+ try {
35914
+ this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
35915
+ const multimodalContent = [
35916
+ { type: 'text', text: prompt },
35917
+ buildImageContent(imageData, this.llmProviderType),
35918
+ ];
35919
+ result = await llm.getChatCompletion({
35920
+ messages: [
35921
+ {
35922
+ role: 'user',
35923
+ content: multimodalContent,
35924
+ },
35925
+ ],
35926
+ max_tokens: 100,
35927
+ temperature: 0.7,
35928
+ });
35929
+ usedVision = true;
35930
+ }
35931
+ catch (visionError) {
35932
+ // If vision format fails, try text-only
35933
+ if (isVisionFormatError(visionError)) {
35934
+ this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
35935
+ }
35936
+ else {
35937
+ this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
35938
+ }
35939
+ }
35915
35940
  }
35916
- else {
35917
- messageContent = prompt;
35941
+ // If no result yet, try text-only
35942
+ if (!result) {
35943
+ this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
35944
+ result = await llm.getChatCompletion({
35945
+ messages: [
35946
+ {
35947
+ role: 'user',
35948
+ content: prompt,
35949
+ },
35950
+ ],
35951
+ max_tokens: 100,
35952
+ temperature: 0.7,
35953
+ });
35918
35954
  }
35919
- // Call LLM using ChatCompletion interface
35920
- this.console.log(`[LLM] Calling getChatCompletion for ${eventType}...`);
35921
- const result = await llm.getChatCompletion({
35922
- messages: [
35923
- {
35924
- role: 'user',
35925
- content: messageContent,
35926
- },
35927
- ],
35928
- max_tokens: 100,
35929
- temperature: 0.7,
35930
- });
35931
35955
  const content = result?.choices?.[0]?.message?.content;
35932
35956
  if (content && typeof content === 'string') {
35933
- this.console.log(`[LLM] Got ${eventType} description: ${content.trim().substring(0, 50)}...`);
35957
+ this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
35934
35958
  return content.trim();
35935
35959
  }
35936
35960
  this.console.warn(`[LLM] No content in response for ${eventType}`);