@blueharford/scrypted-spatial-awareness 0.6.9 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blueharford/scrypted-spatial-awareness",
3
- "version": "0.6.9",
3
+ "version": "0.6.10",
4
4
  "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
5
5
  "author": "Joshua Seidel <blueharford>",
6
6
  "license": "Apache-2.0",
@@ -1077,33 +1077,58 @@ Examples of good descriptions:
1077
1077
 
1078
1078
  Generate ONLY the description, nothing else:`;
1079
1079
 
1080
- // Build message content - use multimodal format if we have an image
1081
- let messageContent: any;
1080
+ // Try multimodal format first, fall back to text-only if it fails
1081
+ let result: any;
1082
+ let usedVision = false;
1083
+
1082
1084
  if (imageData) {
1083
- messageContent = [
1084
- { type: 'text', text: prompt },
1085
- buildImageContent(imageData, this.llmProviderType),
1086
- ];
1087
- } else {
1088
- messageContent = prompt;
1085
+ // First attempt: Try multimodal with image
1086
+ try {
1087
+ this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
1088
+ const multimodalContent = [
1089
+ { type: 'text', text: prompt },
1090
+ buildImageContent(imageData, this.llmProviderType),
1091
+ ];
1092
+
1093
+ result = await llm.getChatCompletion({
1094
+ messages: [
1095
+ {
1096
+ role: 'user',
1097
+ content: multimodalContent,
1098
+ },
1099
+ ],
1100
+ max_tokens: 100,
1101
+ temperature: 0.7,
1102
+ });
1103
+ usedVision = true;
1104
+ } catch (visionError: any) {
1105
+ // If vision format fails, try text-only
1106
+ if (isVisionFormatError(visionError)) {
1107
+ this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
1108
+ } else {
1109
+ this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
1110
+ }
1111
+ }
1089
1112
  }
1090
1113
 
1091
- // Call LLM using ChatCompletion interface
1092
- this.console.log(`[LLM] Calling getChatCompletion for ${eventType}...`);
1093
- const result = await llm.getChatCompletion({
1094
- messages: [
1095
- {
1096
- role: 'user',
1097
- content: messageContent,
1098
- },
1099
- ],
1100
- max_tokens: 100,
1101
- temperature: 0.7,
1102
- });
1114
+ // If no result yet, try text-only
1115
+ if (!result) {
1116
+ this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
1117
+ result = await llm.getChatCompletion({
1118
+ messages: [
1119
+ {
1120
+ role: 'user',
1121
+ content: prompt,
1122
+ },
1123
+ ],
1124
+ max_tokens: 100,
1125
+ temperature: 0.7,
1126
+ });
1127
+ }
1103
1128
 
1104
1129
  const content = result?.choices?.[0]?.message?.content;
1105
1130
  if (content && typeof content === 'string') {
1106
- this.console.log(`[LLM] Got ${eventType} description: ${content.trim().substring(0, 50)}...`);
1131
+ this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
1107
1132
  return content.trim();
1108
1133
  }
1109
1134