@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +73 -24
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +49 -12
- package/src/core/topology-discovery.ts +35 -13
package/out/plugin.zip
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -84,35 +84,58 @@ export interface ImageData {
|
|
|
84
84
|
*/
|
|
85
85
|
export async function mediaObjectToBase64(mediaObject: MediaObject): Promise<ImageData | null> {
|
|
86
86
|
try {
|
|
87
|
+
console.log(`[Image] Converting MediaObject, mimeType=${mediaObject?.mimeType}`);
|
|
88
|
+
|
|
87
89
|
// Convert MediaObject to Buffer using mediaManager
|
|
88
90
|
const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, ScryptedMimeTypes.Image);
|
|
89
91
|
|
|
92
|
+
if (!buffer) {
|
|
93
|
+
console.warn('[Image] convertMediaObjectToBuffer returned null/undefined');
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
console.log(`[Image] Buffer received: ${buffer.length} bytes`);
|
|
98
|
+
|
|
99
|
+
if (buffer.length === 0) {
|
|
100
|
+
console.warn('[Image] Buffer is empty (0 bytes)');
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Check if buffer is too small to be a valid image (< 1KB is suspicious)
|
|
105
|
+
if (buffer.length < 1000) {
|
|
106
|
+
// Log what the buffer contains - might be an error message
|
|
107
|
+
const bufferContent = buffer.toString('utf8').substring(0, 100);
|
|
108
|
+
console.warn(`[Image] Buffer too small (${buffer.length} bytes), content: ${bufferContent}`);
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
|
|
90
112
|
// Convert buffer to base64 (raw, no data URL prefix)
|
|
91
113
|
const base64 = buffer.toString('base64');
|
|
92
114
|
|
|
93
115
|
// Determine MIME type - default to JPEG for camera images
|
|
94
116
|
const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
|
|
95
117
|
|
|
118
|
+
console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
|
|
119
|
+
|
|
96
120
|
return { base64, mediaType };
|
|
97
121
|
} catch (e) {
|
|
98
|
-
console.warn('Failed to convert MediaObject to base64:', e);
|
|
122
|
+
console.warn('[Image] Failed to convert MediaObject to base64:', e);
|
|
99
123
|
return null;
|
|
100
124
|
}
|
|
101
125
|
}
|
|
102
126
|
|
|
103
127
|
/** LLM Provider type for image format selection */
|
|
104
|
-
export type LlmProvider = 'openai' | 'anthropic' | 'unknown';
|
|
128
|
+
export type LlmProvider = 'openai' | 'anthropic' | 'scrypted' | 'unknown';
|
|
105
129
|
|
|
106
130
|
/**
|
|
107
131
|
* Build image content block for ChatCompletion API
|
|
108
|
-
* Supports
|
|
132
|
+
* Supports OpenAI, Anthropic, and @scrypted/llm formats
|
|
109
133
|
* @param imageData - Image data with base64 and media type
|
|
110
|
-
* @param provider - The LLM provider type
|
|
134
|
+
* @param provider - The LLM provider type
|
|
111
135
|
*/
|
|
112
136
|
export function buildImageContent(imageData: ImageData, provider: LlmProvider = 'unknown'): any {
|
|
113
137
|
if (provider === 'openai') {
|
|
114
138
|
// OpenAI format: uses data URL with image_url wrapper
|
|
115
|
-
// Include detail parameter for compatibility
|
|
116
139
|
return {
|
|
117
140
|
type: 'image_url',
|
|
118
141
|
image_url: {
|
|
@@ -121,7 +144,7 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
|
|
|
121
144
|
},
|
|
122
145
|
};
|
|
123
146
|
} else if (provider === 'anthropic') {
|
|
124
|
-
// Anthropic format: uses
|
|
147
|
+
// Anthropic official format: uses 'data' key
|
|
125
148
|
return {
|
|
126
149
|
type: 'image',
|
|
127
150
|
source: {
|
|
@@ -130,29 +153,43 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
|
|
|
130
153
|
data: imageData.base64,
|
|
131
154
|
},
|
|
132
155
|
};
|
|
156
|
+
} else if (provider === 'scrypted') {
|
|
157
|
+
// @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
|
|
158
|
+
return {
|
|
159
|
+
type: 'image',
|
|
160
|
+
source: {
|
|
161
|
+
type: 'base64',
|
|
162
|
+
media_type: imageData.mediaType,
|
|
163
|
+
base64: imageData.base64,
|
|
164
|
+
},
|
|
165
|
+
};
|
|
133
166
|
} else {
|
|
134
|
-
// Unknown provider: try
|
|
135
|
-
// Some plugins may translate this to OpenAI format internally
|
|
167
|
+
// Unknown provider: try @scrypted/llm format first
|
|
136
168
|
return {
|
|
137
169
|
type: 'image',
|
|
138
170
|
source: {
|
|
139
171
|
type: 'base64',
|
|
140
172
|
media_type: imageData.mediaType,
|
|
141
|
-
|
|
173
|
+
base64: imageData.base64,
|
|
142
174
|
},
|
|
143
175
|
};
|
|
144
176
|
}
|
|
145
177
|
}
|
|
146
178
|
|
|
147
|
-
/** Check if an error indicates vision/multimodal content
|
|
148
|
-
export function
|
|
179
|
+
/** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
|
|
180
|
+
export function isVisionFormatError(error: any): boolean {
|
|
149
181
|
const errorStr = String(error);
|
|
150
182
|
return (
|
|
151
183
|
errorStr.includes('content.str') ||
|
|
152
184
|
errorStr.includes('should be a valid string') ||
|
|
153
185
|
errorStr.includes('Invalid content type') ||
|
|
154
186
|
errorStr.includes('does not support vision') ||
|
|
155
|
-
errorStr.includes('
|
|
187
|
+
errorStr.includes('invalid base64') ||
|
|
188
|
+
errorStr.includes('Invalid base64') ||
|
|
189
|
+
errorStr.includes('.image.source') ||
|
|
190
|
+
errorStr.includes('.image_url') ||
|
|
191
|
+
(errorStr.includes('image_url') && errorStr.includes('not supported')) ||
|
|
192
|
+
(errorStr.includes('400') && errorStr.includes('content'))
|
|
156
193
|
);
|
|
157
194
|
}
|
|
158
195
|
|
|
@@ -30,7 +30,7 @@ import {
|
|
|
30
30
|
Landmark,
|
|
31
31
|
findCamera,
|
|
32
32
|
} from '../models/topology';
|
|
33
|
-
import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider,
|
|
33
|
+
import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider, isVisionFormatError } from './spatial-reasoning';
|
|
34
34
|
|
|
35
35
|
const { systemManager } = sdk;
|
|
36
36
|
|
|
@@ -213,11 +213,27 @@ export class TopologyDiscoveryEngine {
|
|
|
213
213
|
try {
|
|
214
214
|
const camera = systemManager.getDeviceById<Camera>(cameraId);
|
|
215
215
|
if (!camera?.interfaces?.includes(ScryptedInterface.Camera)) {
|
|
216
|
+
this.console.warn(`[Discovery] Camera ${cameraId} doesn't have Camera interface`);
|
|
216
217
|
return null;
|
|
217
218
|
}
|
|
218
219
|
|
|
220
|
+
this.console.log(`[Discovery] Taking snapshot from camera: ${camera.name || cameraId}`);
|
|
219
221
|
const mediaObject = await camera.takePicture();
|
|
220
|
-
|
|
222
|
+
|
|
223
|
+
if (!mediaObject) {
|
|
224
|
+
this.console.warn(`[Discovery] takePicture() returned null for ${camera.name}`);
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
this.console.log(`[Discovery] MediaObject received: mimeType=${mediaObject.mimeType}`);
|
|
229
|
+
|
|
230
|
+
const imageData = await mediaObjectToBase64(mediaObject);
|
|
231
|
+
|
|
232
|
+
if (!imageData) {
|
|
233
|
+
this.console.warn(`[Discovery] Failed to convert MediaObject to base64 for ${camera.name}`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return imageData;
|
|
221
237
|
} catch (e) {
|
|
222
238
|
this.console.warn(`[Discovery] Failed to get snapshot from camera ${cameraId}:`, e);
|
|
223
239
|
return null;
|
|
@@ -253,17 +269,23 @@ export class TopologyDiscoveryEngine {
|
|
|
253
269
|
return analysis;
|
|
254
270
|
}
|
|
255
271
|
|
|
256
|
-
// Try with detected provider format first, then fallback to
|
|
257
|
-
|
|
272
|
+
// Try with detected provider format first, then fallback to alternates
|
|
273
|
+
// The order matters: try the most likely formats first
|
|
274
|
+
const formatsToTry: LlmProvider[] = [];
|
|
275
|
+
|
|
276
|
+
// Start with detected format
|
|
277
|
+
formatsToTry.push(this.llmProviderType);
|
|
258
278
|
|
|
259
|
-
// Add
|
|
279
|
+
// Add fallbacks based on detected provider
|
|
260
280
|
if (this.llmProviderType === 'openai') {
|
|
261
|
-
formatsToTry.push('anthropic');
|
|
281
|
+
formatsToTry.push('scrypted', 'anthropic');
|
|
262
282
|
} else if (this.llmProviderType === 'anthropic') {
|
|
263
|
-
formatsToTry.push('openai');
|
|
283
|
+
formatsToTry.push('scrypted', 'openai');
|
|
284
|
+
} else if (this.llmProviderType === 'scrypted') {
|
|
285
|
+
formatsToTry.push('anthropic', 'openai');
|
|
264
286
|
} else {
|
|
265
|
-
// Unknown - try
|
|
266
|
-
formatsToTry.push('openai');
|
|
287
|
+
// Unknown - try all formats
|
|
288
|
+
formatsToTry.push('scrypted', 'anthropic', 'openai');
|
|
267
289
|
}
|
|
268
290
|
|
|
269
291
|
let lastError: any = null;
|
|
@@ -353,8 +375,8 @@ export class TopologyDiscoveryEngine {
|
|
|
353
375
|
lastError = e;
|
|
354
376
|
|
|
355
377
|
// Check if this is a vision/multimodal format error
|
|
356
|
-
if (
|
|
357
|
-
this.console.warn(`[Discovery] ${formatType} format
|
|
378
|
+
if (isVisionFormatError(e)) {
|
|
379
|
+
this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
|
|
358
380
|
continue; // Try next format
|
|
359
381
|
}
|
|
360
382
|
|
|
@@ -367,8 +389,8 @@ export class TopologyDiscoveryEngine {
|
|
|
367
389
|
// All formats failed
|
|
368
390
|
if (lastError) {
|
|
369
391
|
const errorStr = String(lastError);
|
|
370
|
-
if (
|
|
371
|
-
analysis.error = 'Vision/image analysis
|
|
392
|
+
if (isVisionFormatError(lastError)) {
|
|
393
|
+
analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
|
|
372
394
|
} else {
|
|
373
395
|
analysis.error = `Analysis failed: ${errorStr}`;
|
|
374
396
|
}
|