@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +73 -24
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +49 -12
- package/src/core/topology-discovery.ts +35 -13
package/dist/plugin.zip
CHANGED
|
Binary file
|
package/out/main.nodejs.js
CHANGED
|
@@ -35095,7 +35095,7 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
|
|
|
35095
35095
|
exports.SpatialReasoningEngine = void 0;
|
|
35096
35096
|
exports.mediaObjectToBase64 = mediaObjectToBase64;
|
|
35097
35097
|
exports.buildImageContent = buildImageContent;
|
|
35098
|
-
exports.
|
|
35098
|
+
exports.isVisionFormatError = isVisionFormatError;
|
|
35099
35099
|
const sdk_1 = __importStar(__webpack_require__(/*! @scrypted/sdk */ "./node_modules/@scrypted/sdk/dist/src/index.js"));
|
|
35100
35100
|
const topology_1 = __webpack_require__(/*! ../models/topology */ "./src/models/topology.ts");
|
|
35101
35101
|
const { systemManager, mediaManager } = sdk_1.default;
|
|
@@ -35106,29 +35106,46 @@ const { systemManager, mediaManager } = sdk_1.default;
|
|
|
35106
35106
|
*/
|
|
35107
35107
|
async function mediaObjectToBase64(mediaObject) {
|
|
35108
35108
|
try {
|
|
35109
|
+
console.log(`[Image] Converting MediaObject, mimeType=${mediaObject?.mimeType}`);
|
|
35109
35110
|
// Convert MediaObject to Buffer using mediaManager
|
|
35110
35111
|
const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, sdk_1.ScryptedMimeTypes.Image);
|
|
35112
|
+
if (!buffer) {
|
|
35113
|
+
console.warn('[Image] convertMediaObjectToBuffer returned null/undefined');
|
|
35114
|
+
return null;
|
|
35115
|
+
}
|
|
35116
|
+
console.log(`[Image] Buffer received: ${buffer.length} bytes`);
|
|
35117
|
+
if (buffer.length === 0) {
|
|
35118
|
+
console.warn('[Image] Buffer is empty (0 bytes)');
|
|
35119
|
+
return null;
|
|
35120
|
+
}
|
|
35121
|
+
// Check if buffer is too small to be a valid image (< 1KB is suspicious)
|
|
35122
|
+
if (buffer.length < 1000) {
|
|
35123
|
+
// Log what the buffer contains - might be an error message
|
|
35124
|
+
const bufferContent = buffer.toString('utf8').substring(0, 100);
|
|
35125
|
+
console.warn(`[Image] Buffer too small (${buffer.length} bytes), content: ${bufferContent}`);
|
|
35126
|
+
return null;
|
|
35127
|
+
}
|
|
35111
35128
|
// Convert buffer to base64 (raw, no data URL prefix)
|
|
35112
35129
|
const base64 = buffer.toString('base64');
|
|
35113
35130
|
// Determine MIME type - default to JPEG for camera images
|
|
35114
35131
|
const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
|
|
35132
|
+
console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
|
|
35115
35133
|
return { base64, mediaType };
|
|
35116
35134
|
}
|
|
35117
35135
|
catch (e) {
|
|
35118
|
-
console.warn('Failed to convert MediaObject to base64:', e);
|
|
35136
|
+
console.warn('[Image] Failed to convert MediaObject to base64:', e);
|
|
35119
35137
|
return null;
|
|
35120
35138
|
}
|
|
35121
35139
|
}
|
|
35122
35140
|
/**
|
|
35123
35141
|
* Build image content block for ChatCompletion API
|
|
35124
|
-
* Supports
|
|
35142
|
+
* Supports OpenAI, Anthropic, and @scrypted/llm formats
|
|
35125
35143
|
* @param imageData - Image data with base64 and media type
|
|
35126
|
-
* @param provider - The LLM provider type
|
|
35144
|
+
* @param provider - The LLM provider type
|
|
35127
35145
|
*/
|
|
35128
35146
|
function buildImageContent(imageData, provider = 'unknown') {
|
|
35129
35147
|
if (provider === 'openai') {
|
|
35130
35148
|
// OpenAI format: uses data URL with image_url wrapper
|
|
35131
|
-
// Include detail parameter for compatibility
|
|
35132
35149
|
return {
|
|
35133
35150
|
type: 'image_url',
|
|
35134
35151
|
image_url: {
|
|
@@ -35138,7 +35155,7 @@ function buildImageContent(imageData, provider = 'unknown') {
|
|
|
35138
35155
|
};
|
|
35139
35156
|
}
|
|
35140
35157
|
else if (provider === 'anthropic') {
|
|
35141
|
-
// Anthropic format: uses
|
|
35158
|
+
// Anthropic official format: uses 'data' key
|
|
35142
35159
|
return {
|
|
35143
35160
|
type: 'image',
|
|
35144
35161
|
source: {
|
|
@@ -35148,27 +35165,42 @@ function buildImageContent(imageData, provider = 'unknown') {
|
|
|
35148
35165
|
},
|
|
35149
35166
|
};
|
|
35150
35167
|
}
|
|
35168
|
+
else if (provider === 'scrypted') {
|
|
35169
|
+
// @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
|
|
35170
|
+
return {
|
|
35171
|
+
type: 'image',
|
|
35172
|
+
source: {
|
|
35173
|
+
type: 'base64',
|
|
35174
|
+
media_type: imageData.mediaType,
|
|
35175
|
+
base64: imageData.base64,
|
|
35176
|
+
},
|
|
35177
|
+
};
|
|
35178
|
+
}
|
|
35151
35179
|
else {
|
|
35152
|
-
// Unknown provider: try
|
|
35153
|
-
// Some plugins may translate this to OpenAI format internally
|
|
35180
|
+
// Unknown provider: try @scrypted/llm format first
|
|
35154
35181
|
return {
|
|
35155
35182
|
type: 'image',
|
|
35156
35183
|
source: {
|
|
35157
35184
|
type: 'base64',
|
|
35158
35185
|
media_type: imageData.mediaType,
|
|
35159
|
-
|
|
35186
|
+
base64: imageData.base64,
|
|
35160
35187
|
},
|
|
35161
35188
|
};
|
|
35162
35189
|
}
|
|
35163
35190
|
}
|
|
35164
|
-
/** Check if an error indicates vision/multimodal content
|
|
35165
|
-
function
|
|
35191
|
+
/** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
|
|
35192
|
+
function isVisionFormatError(error) {
|
|
35166
35193
|
const errorStr = String(error);
|
|
35167
35194
|
return (errorStr.includes('content.str') ||
|
|
35168
35195
|
errorStr.includes('should be a valid string') ||
|
|
35169
35196
|
errorStr.includes('Invalid content type') ||
|
|
35170
35197
|
errorStr.includes('does not support vision') ||
|
|
35171
|
-
errorStr.includes('
|
|
35198
|
+
errorStr.includes('invalid base64') ||
|
|
35199
|
+
errorStr.includes('Invalid base64') ||
|
|
35200
|
+
errorStr.includes('.image.source') ||
|
|
35201
|
+
errorStr.includes('.image_url') ||
|
|
35202
|
+
(errorStr.includes('image_url') && errorStr.includes('not supported')) ||
|
|
35203
|
+
(errorStr.includes('400') && errorStr.includes('content')));
|
|
35172
35204
|
}
|
|
35173
35205
|
class SpatialReasoningEngine {
|
|
35174
35206
|
config;
|
|
@@ -36165,10 +36197,21 @@ class TopologyDiscoveryEngine {
|
|
|
36165
36197
|
try {
|
|
36166
36198
|
const camera = systemManager.getDeviceById(cameraId);
|
|
36167
36199
|
if (!camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
|
|
36200
|
+
this.console.warn(`[Discovery] Camera ${cameraId} doesn't have Camera interface`);
|
|
36168
36201
|
return null;
|
|
36169
36202
|
}
|
|
36203
|
+
this.console.log(`[Discovery] Taking snapshot from camera: ${camera.name || cameraId}`);
|
|
36170
36204
|
const mediaObject = await camera.takePicture();
|
|
36171
|
-
|
|
36205
|
+
if (!mediaObject) {
|
|
36206
|
+
this.console.warn(`[Discovery] takePicture() returned null for ${camera.name}`);
|
|
36207
|
+
return null;
|
|
36208
|
+
}
|
|
36209
|
+
this.console.log(`[Discovery] MediaObject received: mimeType=${mediaObject.mimeType}`);
|
|
36210
|
+
const imageData = await (0, spatial_reasoning_1.mediaObjectToBase64)(mediaObject);
|
|
36211
|
+
if (!imageData) {
|
|
36212
|
+
this.console.warn(`[Discovery] Failed to convert MediaObject to base64 for ${camera.name}`);
|
|
36213
|
+
}
|
|
36214
|
+
return imageData;
|
|
36172
36215
|
}
|
|
36173
36216
|
catch (e) {
|
|
36174
36217
|
this.console.warn(`[Discovery] Failed to get snapshot from camera ${cameraId}:`, e);
|
|
@@ -36200,18 +36243,24 @@ class TopologyDiscoveryEngine {
|
|
|
36200
36243
|
analysis.error = 'Failed to capture camera snapshot';
|
|
36201
36244
|
return analysis;
|
|
36202
36245
|
}
|
|
36203
|
-
// Try with detected provider format first, then fallback to
|
|
36204
|
-
|
|
36205
|
-
|
|
36246
|
+
// Try with detected provider format first, then fallback to alternates
|
|
36247
|
+
// The order matters: try the most likely formats first
|
|
36248
|
+
const formatsToTry = [];
|
|
36249
|
+
// Start with detected format
|
|
36250
|
+
formatsToTry.push(this.llmProviderType);
|
|
36251
|
+
// Add fallbacks based on detected provider
|
|
36206
36252
|
if (this.llmProviderType === 'openai') {
|
|
36207
|
-
formatsToTry.push('anthropic');
|
|
36253
|
+
formatsToTry.push('scrypted', 'anthropic');
|
|
36208
36254
|
}
|
|
36209
36255
|
else if (this.llmProviderType === 'anthropic') {
|
|
36210
|
-
formatsToTry.push('openai');
|
|
36256
|
+
formatsToTry.push('scrypted', 'openai');
|
|
36257
|
+
}
|
|
36258
|
+
else if (this.llmProviderType === 'scrypted') {
|
|
36259
|
+
formatsToTry.push('anthropic', 'openai');
|
|
36211
36260
|
}
|
|
36212
36261
|
else {
|
|
36213
|
-
// Unknown - try
|
|
36214
|
-
formatsToTry.push('openai');
|
|
36262
|
+
// Unknown - try all formats
|
|
36263
|
+
formatsToTry.push('scrypted', 'anthropic', 'openai');
|
|
36215
36264
|
}
|
|
36216
36265
|
let lastError = null;
|
|
36217
36266
|
for (const formatType of formatsToTry) {
|
|
@@ -36290,8 +36339,8 @@ class TopologyDiscoveryEngine {
|
|
|
36290
36339
|
catch (e) {
|
|
36291
36340
|
lastError = e;
|
|
36292
36341
|
// Check if this is a vision/multimodal format error
|
|
36293
|
-
if ((0, spatial_reasoning_1.
|
|
36294
|
-
this.console.warn(`[Discovery] ${formatType} format
|
|
36342
|
+
if ((0, spatial_reasoning_1.isVisionFormatError)(e)) {
|
|
36343
|
+
this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
|
|
36295
36344
|
continue; // Try next format
|
|
36296
36345
|
}
|
|
36297
36346
|
// Not a format error - don't retry
|
|
@@ -36302,8 +36351,8 @@ class TopologyDiscoveryEngine {
|
|
|
36302
36351
|
// All formats failed
|
|
36303
36352
|
if (lastError) {
|
|
36304
36353
|
const errorStr = String(lastError);
|
|
36305
|
-
if ((0, spatial_reasoning_1.
|
|
36306
|
-
analysis.error = 'Vision/image analysis
|
|
36354
|
+
if ((0, spatial_reasoning_1.isVisionFormatError)(lastError)) {
|
|
36355
|
+
analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
|
|
36307
36356
|
}
|
|
36308
36357
|
else {
|
|
36309
36358
|
analysis.error = `Analysis failed: ${errorStr}`;
|