@blueharford/scrypted-spatial-awareness 0.6.9 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +45 -21
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +46 -21
package/out/plugin.zip
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1077,33 +1077,58 @@ Examples of good descriptions:
|
|
|
1077
1077
|
|
|
1078
1078
|
Generate ONLY the description, nothing else:`;
|
|
1079
1079
|
|
|
1080
|
-
//
|
|
1081
|
-
let
|
|
1080
|
+
// Try multimodal format first, fall back to text-only if it fails
|
|
1081
|
+
let result: any;
|
|
1082
|
+
let usedVision = false;
|
|
1083
|
+
|
|
1082
1084
|
if (imageData) {
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1085
|
+
// First attempt: Try multimodal with image
|
|
1086
|
+
try {
|
|
1087
|
+
this.console.log(`[LLM] Attempting multimodal ${eventType} call with image...`);
|
|
1088
|
+
const multimodalContent = [
|
|
1089
|
+
{ type: 'text', text: prompt },
|
|
1090
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
1091
|
+
];
|
|
1092
|
+
|
|
1093
|
+
result = await llm.getChatCompletion({
|
|
1094
|
+
messages: [
|
|
1095
|
+
{
|
|
1096
|
+
role: 'user',
|
|
1097
|
+
content: multimodalContent,
|
|
1098
|
+
},
|
|
1099
|
+
],
|
|
1100
|
+
max_tokens: 100,
|
|
1101
|
+
temperature: 0.7,
|
|
1102
|
+
});
|
|
1103
|
+
usedVision = true;
|
|
1104
|
+
} catch (visionError: any) {
|
|
1105
|
+
// If vision format fails, try text-only
|
|
1106
|
+
if (isVisionFormatError(visionError)) {
|
|
1107
|
+
this.console.warn(`[LLM] Vision format not supported, falling back to text-only: ${visionError.message || visionError}`);
|
|
1108
|
+
} else {
|
|
1109
|
+
this.console.warn(`[LLM] Multimodal call failed, trying text-only: ${visionError.message || visionError}`);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1089
1112
|
}
|
|
1090
1113
|
|
|
1091
|
-
//
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1114
|
+
// If no result yet, try text-only
|
|
1115
|
+
if (!result) {
|
|
1116
|
+
this.console.log(`[LLM] Calling text-only getChatCompletion for ${eventType}...`);
|
|
1117
|
+
result = await llm.getChatCompletion({
|
|
1118
|
+
messages: [
|
|
1119
|
+
{
|
|
1120
|
+
role: 'user',
|
|
1121
|
+
content: prompt,
|
|
1122
|
+
},
|
|
1123
|
+
],
|
|
1124
|
+
max_tokens: 100,
|
|
1125
|
+
temperature: 0.7,
|
|
1126
|
+
});
|
|
1127
|
+
}
|
|
1103
1128
|
|
|
1104
1129
|
const content = result?.choices?.[0]?.message?.content;
|
|
1105
1130
|
if (content && typeof content === 'string') {
|
|
1106
|
-
this.console.log(`[LLM] Got ${eventType} description: ${content.trim().substring(0, 50)}...`);
|
|
1131
|
+
this.console.log(`[LLM] Got ${eventType} description (vision=${usedVision}): ${content.trim().substring(0, 50)}...`);
|
|
1107
1132
|
return content.trim();
|
|
1108
1133
|
}
|
|
1109
1134
|
|