@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +52 -22
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +37 -11
- package/src/core/topology-discovery.ts +18 -12
package/dist/plugin.zip
CHANGED
|
Binary file
|
package/out/main.nodejs.js
CHANGED
|
@@ -35095,7 +35095,7 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
|
|
|
35095
35095
|
exports.SpatialReasoningEngine = void 0;
|
|
35096
35096
|
exports.mediaObjectToBase64 = mediaObjectToBase64;
|
|
35097
35097
|
exports.buildImageContent = buildImageContent;
|
|
35098
|
-
exports.
|
|
35098
|
+
exports.isVisionFormatError = isVisionFormatError;
|
|
35099
35099
|
const sdk_1 = __importStar(__webpack_require__(/*! @scrypted/sdk */ "./node_modules/@scrypted/sdk/dist/src/index.js"));
|
|
35100
35100
|
const topology_1 = __webpack_require__(/*! ../models/topology */ "./src/models/topology.ts");
|
|
35101
35101
|
const { systemManager, mediaManager } = sdk_1.default;
|
|
@@ -35108,10 +35108,20 @@ async function mediaObjectToBase64(mediaObject) {
|
|
|
35108
35108
|
try {
|
|
35109
35109
|
// Convert MediaObject to Buffer using mediaManager
|
|
35110
35110
|
const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, sdk_1.ScryptedMimeTypes.Image);
|
|
35111
|
+
if (!buffer || buffer.length === 0) {
|
|
35112
|
+
console.warn('Failed to convert MediaObject: empty buffer');
|
|
35113
|
+
return null;
|
|
35114
|
+
}
|
|
35111
35115
|
// Convert buffer to base64 (raw, no data URL prefix)
|
|
35112
35116
|
const base64 = buffer.toString('base64');
|
|
35117
|
+
// Validate base64 - check it's not empty and looks valid
|
|
35118
|
+
if (!base64 || base64.length < 100) {
|
|
35119
|
+
console.warn(`Invalid base64: length=${base64?.length || 0}`);
|
|
35120
|
+
return null;
|
|
35121
|
+
}
|
|
35113
35122
|
// Determine MIME type - default to JPEG for camera images
|
|
35114
35123
|
const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
|
|
35124
|
+
console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
|
|
35115
35125
|
return { base64, mediaType };
|
|
35116
35126
|
}
|
|
35117
35127
|
catch (e) {
|
|
@@ -35121,14 +35131,13 @@ async function mediaObjectToBase64(mediaObject) {
|
|
|
35121
35131
|
}
|
|
35122
35132
|
/**
|
|
35123
35133
|
* Build image content block for ChatCompletion API
|
|
35124
|
-
* Supports
|
|
35134
|
+
* Supports OpenAI, Anthropic, and @scrypted/llm formats
|
|
35125
35135
|
* @param imageData - Image data with base64 and media type
|
|
35126
|
-
* @param provider - The LLM provider type
|
|
35136
|
+
* @param provider - The LLM provider type
|
|
35127
35137
|
*/
|
|
35128
35138
|
function buildImageContent(imageData, provider = 'unknown') {
|
|
35129
35139
|
if (provider === 'openai') {
|
|
35130
35140
|
// OpenAI format: uses data URL with image_url wrapper
|
|
35131
|
-
// Include detail parameter for compatibility
|
|
35132
35141
|
return {
|
|
35133
35142
|
type: 'image_url',
|
|
35134
35143
|
image_url: {
|
|
@@ -35138,7 +35147,7 @@ function buildImageContent(imageData, provider = 'unknown') {
|
|
|
35138
35147
|
};
|
|
35139
35148
|
}
|
|
35140
35149
|
else if (provider === 'anthropic') {
|
|
35141
|
-
// Anthropic format: uses
|
|
35150
|
+
// Anthropic official format: uses 'data' key
|
|
35142
35151
|
return {
|
|
35143
35152
|
type: 'image',
|
|
35144
35153
|
source: {
|
|
@@ -35148,27 +35157,42 @@ function buildImageContent(imageData, provider = 'unknown') {
|
|
|
35148
35157
|
},
|
|
35149
35158
|
};
|
|
35150
35159
|
}
|
|
35160
|
+
else if (provider === 'scrypted') {
|
|
35161
|
+
// @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
|
|
35162
|
+
return {
|
|
35163
|
+
type: 'image',
|
|
35164
|
+
source: {
|
|
35165
|
+
type: 'base64',
|
|
35166
|
+
media_type: imageData.mediaType,
|
|
35167
|
+
base64: imageData.base64,
|
|
35168
|
+
},
|
|
35169
|
+
};
|
|
35170
|
+
}
|
|
35151
35171
|
else {
|
|
35152
|
-
// Unknown provider: try
|
|
35153
|
-
// Some plugins may translate this to OpenAI format internally
|
|
35172
|
+
// Unknown provider: try @scrypted/llm format first
|
|
35154
35173
|
return {
|
|
35155
35174
|
type: 'image',
|
|
35156
35175
|
source: {
|
|
35157
35176
|
type: 'base64',
|
|
35158
35177
|
media_type: imageData.mediaType,
|
|
35159
|
-
|
|
35178
|
+
base64: imageData.base64,
|
|
35160
35179
|
},
|
|
35161
35180
|
};
|
|
35162
35181
|
}
|
|
35163
35182
|
}
|
|
35164
|
-
/** Check if an error indicates vision/multimodal content
|
|
35165
|
-
function
|
|
35183
|
+
/** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
|
|
35184
|
+
function isVisionFormatError(error) {
|
|
35166
35185
|
const errorStr = String(error);
|
|
35167
35186
|
return (errorStr.includes('content.str') ||
|
|
35168
35187
|
errorStr.includes('should be a valid string') ||
|
|
35169
35188
|
errorStr.includes('Invalid content type') ||
|
|
35170
35189
|
errorStr.includes('does not support vision') ||
|
|
35171
|
-
errorStr.includes('
|
|
35190
|
+
errorStr.includes('invalid base64') ||
|
|
35191
|
+
errorStr.includes('Invalid base64') ||
|
|
35192
|
+
errorStr.includes('.image.source') ||
|
|
35193
|
+
errorStr.includes('.image_url') ||
|
|
35194
|
+
(errorStr.includes('image_url') && errorStr.includes('not supported')) ||
|
|
35195
|
+
(errorStr.includes('400') && errorStr.includes('content')));
|
|
35172
35196
|
}
|
|
35173
35197
|
class SpatialReasoningEngine {
|
|
35174
35198
|
config;
|
|
@@ -36200,18 +36224,24 @@ class TopologyDiscoveryEngine {
|
|
|
36200
36224
|
analysis.error = 'Failed to capture camera snapshot';
|
|
36201
36225
|
return analysis;
|
|
36202
36226
|
}
|
|
36203
|
-
// Try with detected provider format first, then fallback to
|
|
36204
|
-
|
|
36205
|
-
|
|
36227
|
+
// Try with detected provider format first, then fallback to alternates
|
|
36228
|
+
// The order matters: try the most likely formats first
|
|
36229
|
+
const formatsToTry = [];
|
|
36230
|
+
// Start with detected format
|
|
36231
|
+
formatsToTry.push(this.llmProviderType);
|
|
36232
|
+
// Add fallbacks based on detected provider
|
|
36206
36233
|
if (this.llmProviderType === 'openai') {
|
|
36207
|
-
formatsToTry.push('anthropic');
|
|
36234
|
+
formatsToTry.push('scrypted', 'anthropic');
|
|
36208
36235
|
}
|
|
36209
36236
|
else if (this.llmProviderType === 'anthropic') {
|
|
36210
|
-
formatsToTry.push('openai');
|
|
36237
|
+
formatsToTry.push('scrypted', 'openai');
|
|
36238
|
+
}
|
|
36239
|
+
else if (this.llmProviderType === 'scrypted') {
|
|
36240
|
+
formatsToTry.push('anthropic', 'openai');
|
|
36211
36241
|
}
|
|
36212
36242
|
else {
|
|
36213
|
-
// Unknown - try
|
|
36214
|
-
formatsToTry.push('openai');
|
|
36243
|
+
// Unknown - try all formats
|
|
36244
|
+
formatsToTry.push('scrypted', 'anthropic', 'openai');
|
|
36215
36245
|
}
|
|
36216
36246
|
let lastError = null;
|
|
36217
36247
|
for (const formatType of formatsToTry) {
|
|
@@ -36290,8 +36320,8 @@ class TopologyDiscoveryEngine {
|
|
|
36290
36320
|
catch (e) {
|
|
36291
36321
|
lastError = e;
|
|
36292
36322
|
// Check if this is a vision/multimodal format error
|
|
36293
|
-
if ((0, spatial_reasoning_1.
|
|
36294
|
-
this.console.warn(`[Discovery] ${formatType} format
|
|
36323
|
+
if ((0, spatial_reasoning_1.isVisionFormatError)(e)) {
|
|
36324
|
+
this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
|
|
36295
36325
|
continue; // Try next format
|
|
36296
36326
|
}
|
|
36297
36327
|
// Not a format error - don't retry
|
|
@@ -36302,8 +36332,8 @@ class TopologyDiscoveryEngine {
|
|
|
36302
36332
|
// All formats failed
|
|
36303
36333
|
if (lastError) {
|
|
36304
36334
|
const errorStr = String(lastError);
|
|
36305
|
-
if ((0, spatial_reasoning_1.
|
|
36306
|
-
analysis.error = 'Vision/image analysis
|
|
36335
|
+
if ((0, spatial_reasoning_1.isVisionFormatError)(lastError)) {
|
|
36336
|
+
analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
|
|
36307
36337
|
}
|
|
36308
36338
|
else {
|
|
36309
36339
|
analysis.error = `Analysis failed: ${errorStr}`;
|