@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +52 -22
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +37 -11
- package/src/core/topology-discovery.ts +18 -12
package/out/plugin.zip
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -87,12 +87,25 @@ export async function mediaObjectToBase64(mediaObject: MediaObject): Promise<Ima
|
|
|
87
87
|
// Convert MediaObject to Buffer using mediaManager
|
|
88
88
|
const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, ScryptedMimeTypes.Image);
|
|
89
89
|
|
|
90
|
+
if (!buffer || buffer.length === 0) {
|
|
91
|
+
console.warn('Failed to convert MediaObject: empty buffer');
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
|
|
90
95
|
// Convert buffer to base64 (raw, no data URL prefix)
|
|
91
96
|
const base64 = buffer.toString('base64');
|
|
92
97
|
|
|
98
|
+
// Validate base64 - check it's not empty and looks valid
|
|
99
|
+
if (!base64 || base64.length < 100) {
|
|
100
|
+
console.warn(`Invalid base64: length=${base64?.length || 0}`);
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
|
|
93
104
|
// Determine MIME type - default to JPEG for camera images
|
|
94
105
|
const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
|
|
95
106
|
|
|
107
|
+
console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
|
|
108
|
+
|
|
96
109
|
return { base64, mediaType };
|
|
97
110
|
} catch (e) {
|
|
98
111
|
console.warn('Failed to convert MediaObject to base64:', e);
|
|
@@ -101,18 +114,17 @@ export async function mediaObjectToBase64(mediaObject: MediaObject): Promise<Ima
|
|
|
101
114
|
}
|
|
102
115
|
|
|
103
116
|
/** LLM Provider type for image format selection */
|
|
104
|
-
export type LlmProvider = 'openai' | 'anthropic' | 'unknown';
|
|
117
|
+
export type LlmProvider = 'openai' | 'anthropic' | 'scrypted' | 'unknown';
|
|
105
118
|
|
|
106
119
|
/**
|
|
107
120
|
* Build image content block for ChatCompletion API
|
|
108
|
-
* Supports
|
|
121
|
+
* Supports OpenAI, Anthropic, and @scrypted/llm formats
|
|
109
122
|
* @param imageData - Image data with base64 and media type
|
|
110
|
-
* @param provider - The LLM provider type
|
|
123
|
+
* @param provider - The LLM provider type
|
|
111
124
|
*/
|
|
112
125
|
export function buildImageContent(imageData: ImageData, provider: LlmProvider = 'unknown'): any {
|
|
113
126
|
if (provider === 'openai') {
|
|
114
127
|
// OpenAI format: uses data URL with image_url wrapper
|
|
115
|
-
// Include detail parameter for compatibility
|
|
116
128
|
return {
|
|
117
129
|
type: 'image_url',
|
|
118
130
|
image_url: {
|
|
@@ -121,7 +133,7 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
|
|
|
121
133
|
},
|
|
122
134
|
};
|
|
123
135
|
} else if (provider === 'anthropic') {
|
|
124
|
-
// Anthropic format: uses
|
|
136
|
+
// Anthropic official format: uses 'data' key
|
|
125
137
|
return {
|
|
126
138
|
type: 'image',
|
|
127
139
|
source: {
|
|
@@ -130,29 +142,43 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
|
|
|
130
142
|
data: imageData.base64,
|
|
131
143
|
},
|
|
132
144
|
};
|
|
145
|
+
} else if (provider === 'scrypted') {
|
|
146
|
+
// @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
|
|
147
|
+
return {
|
|
148
|
+
type: 'image',
|
|
149
|
+
source: {
|
|
150
|
+
type: 'base64',
|
|
151
|
+
media_type: imageData.mediaType,
|
|
152
|
+
base64: imageData.base64,
|
|
153
|
+
},
|
|
154
|
+
};
|
|
133
155
|
} else {
|
|
134
|
-
// Unknown provider: try
|
|
135
|
-
// Some plugins may translate this to OpenAI format internally
|
|
156
|
+
// Unknown provider: try @scrypted/llm format first
|
|
136
157
|
return {
|
|
137
158
|
type: 'image',
|
|
138
159
|
source: {
|
|
139
160
|
type: 'base64',
|
|
140
161
|
media_type: imageData.mediaType,
|
|
141
|
-
|
|
162
|
+
base64: imageData.base64,
|
|
142
163
|
},
|
|
143
164
|
};
|
|
144
165
|
}
|
|
145
166
|
}
|
|
146
167
|
|
|
147
|
-
/** Check if an error indicates vision/multimodal content
|
|
148
|
-
export function
|
|
168
|
+
/** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
|
|
169
|
+
export function isVisionFormatError(error: any): boolean {
|
|
149
170
|
const errorStr = String(error);
|
|
150
171
|
return (
|
|
151
172
|
errorStr.includes('content.str') ||
|
|
152
173
|
errorStr.includes('should be a valid string') ||
|
|
153
174
|
errorStr.includes('Invalid content type') ||
|
|
154
175
|
errorStr.includes('does not support vision') ||
|
|
155
|
-
errorStr.includes('
|
|
176
|
+
errorStr.includes('invalid base64') ||
|
|
177
|
+
errorStr.includes('Invalid base64') ||
|
|
178
|
+
errorStr.includes('.image.source') ||
|
|
179
|
+
errorStr.includes('.image_url') ||
|
|
180
|
+
(errorStr.includes('image_url') && errorStr.includes('not supported')) ||
|
|
181
|
+
(errorStr.includes('400') && errorStr.includes('content'))
|
|
156
182
|
);
|
|
157
183
|
}
|
|
158
184
|
|
|
@@ -30,7 +30,7 @@ import {
|
|
|
30
30
|
Landmark,
|
|
31
31
|
findCamera,
|
|
32
32
|
} from '../models/topology';
|
|
33
|
-
import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider,
|
|
33
|
+
import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider, isVisionFormatError } from './spatial-reasoning';
|
|
34
34
|
|
|
35
35
|
const { systemManager } = sdk;
|
|
36
36
|
|
|
@@ -253,17 +253,23 @@ export class TopologyDiscoveryEngine {
|
|
|
253
253
|
return analysis;
|
|
254
254
|
}
|
|
255
255
|
|
|
256
|
-
// Try with detected provider format first, then fallback to
|
|
257
|
-
|
|
256
|
+
// Try with detected provider format first, then fallback to alternates
|
|
257
|
+
// The order matters: try the most likely formats first
|
|
258
|
+
const formatsToTry: LlmProvider[] = [];
|
|
258
259
|
|
|
259
|
-
//
|
|
260
|
+
// Start with detected format
|
|
261
|
+
formatsToTry.push(this.llmProviderType);
|
|
262
|
+
|
|
263
|
+
// Add fallbacks based on detected provider
|
|
260
264
|
if (this.llmProviderType === 'openai') {
|
|
261
|
-
formatsToTry.push('anthropic');
|
|
265
|
+
formatsToTry.push('scrypted', 'anthropic');
|
|
262
266
|
} else if (this.llmProviderType === 'anthropic') {
|
|
263
|
-
formatsToTry.push('openai');
|
|
267
|
+
formatsToTry.push('scrypted', 'openai');
|
|
268
|
+
} else if (this.llmProviderType === 'scrypted') {
|
|
269
|
+
formatsToTry.push('anthropic', 'openai');
|
|
264
270
|
} else {
|
|
265
|
-
// Unknown - try
|
|
266
|
-
formatsToTry.push('openai');
|
|
271
|
+
// Unknown - try all formats
|
|
272
|
+
formatsToTry.push('scrypted', 'anthropic', 'openai');
|
|
267
273
|
}
|
|
268
274
|
|
|
269
275
|
let lastError: any = null;
|
|
@@ -353,8 +359,8 @@ export class TopologyDiscoveryEngine {
|
|
|
353
359
|
lastError = e;
|
|
354
360
|
|
|
355
361
|
// Check if this is a vision/multimodal format error
|
|
356
|
-
if (
|
|
357
|
-
this.console.warn(`[Discovery] ${formatType} format
|
|
362
|
+
if (isVisionFormatError(e)) {
|
|
363
|
+
this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
|
|
358
364
|
continue; // Try next format
|
|
359
365
|
}
|
|
360
366
|
|
|
@@ -367,8 +373,8 @@ export class TopologyDiscoveryEngine {
|
|
|
367
373
|
// All formats failed
|
|
368
374
|
if (lastError) {
|
|
369
375
|
const errorStr = String(lastError);
|
|
370
|
-
if (
|
|
371
|
-
analysis.error = 'Vision/image analysis
|
|
376
|
+
if (isVisionFormatError(lastError)) {
|
|
377
|
+
analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
|
|
372
378
|
} else {
|
|
373
379
|
analysis.error = `Analysis failed: ${errorStr}`;
|
|
374
380
|
}
|