@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.zip CHANGED
Binary file
@@ -35095,7 +35095,7 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
35095
35095
  exports.SpatialReasoningEngine = void 0;
35096
35096
  exports.mediaObjectToBase64 = mediaObjectToBase64;
35097
35097
  exports.buildImageContent = buildImageContent;
35098
- exports.isVisionNotSupportedError = isVisionNotSupportedError;
35098
+ exports.isVisionFormatError = isVisionFormatError;
35099
35099
  const sdk_1 = __importStar(__webpack_require__(/*! @scrypted/sdk */ "./node_modules/@scrypted/sdk/dist/src/index.js"));
35100
35100
  const topology_1 = __webpack_require__(/*! ../models/topology */ "./src/models/topology.ts");
35101
35101
  const { systemManager, mediaManager } = sdk_1.default;
@@ -35108,10 +35108,20 @@ async function mediaObjectToBase64(mediaObject) {
35108
35108
  try {
35109
35109
  // Convert MediaObject to Buffer using mediaManager
35110
35110
  const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, sdk_1.ScryptedMimeTypes.Image);
35111
+ if (!buffer || buffer.length === 0) {
35112
+ console.warn('Failed to convert MediaObject: empty buffer');
35113
+ return null;
35114
+ }
35111
35115
  // Convert buffer to base64 (raw, no data URL prefix)
35112
35116
  const base64 = buffer.toString('base64');
35117
+ // Validate base64 - check it's not empty and looks valid
35118
+ if (!base64 || base64.length < 100) {
35119
+ console.warn(`Invalid base64: length=${base64?.length || 0}`);
35120
+ return null;
35121
+ }
35113
35122
  // Determine MIME type - default to JPEG for camera images
35114
35123
  const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
35124
+ console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
35115
35125
  return { base64, mediaType };
35116
35126
  }
35117
35127
  catch (e) {
@@ -35121,14 +35131,13 @@ async function mediaObjectToBase64(mediaObject) {
35121
35131
  }
35122
35132
  /**
35123
35133
  * Build image content block for ChatCompletion API
35124
- * Supports both OpenAI and Anthropic formats
35134
+ * Supports OpenAI, Anthropic, and @scrypted/llm formats
35125
35135
  * @param imageData - Image data with base64 and media type
35126
- * @param provider - The LLM provider type (openai, anthropic, or unknown)
35136
+ * @param provider - The LLM provider type
35127
35137
  */
35128
35138
  function buildImageContent(imageData, provider = 'unknown') {
35129
35139
  if (provider === 'openai') {
35130
35140
  // OpenAI format: uses data URL with image_url wrapper
35131
- // Include detail parameter for compatibility
35132
35141
  return {
35133
35142
  type: 'image_url',
35134
35143
  image_url: {
@@ -35138,7 +35147,7 @@ function buildImageContent(imageData, provider = 'unknown') {
35138
35147
  };
35139
35148
  }
35140
35149
  else if (provider === 'anthropic') {
35141
- // Anthropic format: uses separate base64 data and media_type
35150
+ // Anthropic official format: uses 'data' key
35142
35151
  return {
35143
35152
  type: 'image',
35144
35153
  source: {
@@ -35148,27 +35157,42 @@ function buildImageContent(imageData, provider = 'unknown') {
35148
35157
  },
35149
35158
  };
35150
35159
  }
35160
+ else if (provider === 'scrypted') {
35161
+ // @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
35162
+ return {
35163
+ type: 'image',
35164
+ source: {
35165
+ type: 'base64',
35166
+ media_type: imageData.mediaType,
35167
+ base64: imageData.base64,
35168
+ },
35169
+ };
35170
+ }
35151
35171
  else {
35152
- // Unknown provider: try Anthropic format first as it's more explicit
35153
- // Some plugins may translate this to OpenAI format internally
35172
+ // Unknown provider: try @scrypted/llm format first
35154
35173
  return {
35155
35174
  type: 'image',
35156
35175
  source: {
35157
35176
  type: 'base64',
35158
35177
  media_type: imageData.mediaType,
35159
- data: imageData.base64,
35178
+ base64: imageData.base64,
35160
35179
  },
35161
35180
  };
35162
35181
  }
35163
35182
  }
35164
- /** Check if an error indicates vision/multimodal content is not supported */
35165
- function isVisionNotSupportedError(error) {
35183
+ /** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
35184
+ function isVisionFormatError(error) {
35166
35185
  const errorStr = String(error);
35167
35186
  return (errorStr.includes('content.str') ||
35168
35187
  errorStr.includes('should be a valid string') ||
35169
35188
  errorStr.includes('Invalid content type') ||
35170
35189
  errorStr.includes('does not support vision') ||
35171
- errorStr.includes('image_url') && errorStr.includes('not supported'));
35190
+ errorStr.includes('invalid base64') ||
35191
+ errorStr.includes('Invalid base64') ||
35192
+ errorStr.includes('.image.source') ||
35193
+ errorStr.includes('.image_url') ||
35194
+ (errorStr.includes('image_url') && errorStr.includes('not supported')) ||
35195
+ (errorStr.includes('400') && errorStr.includes('content')));
35172
35196
  }
35173
35197
  class SpatialReasoningEngine {
35174
35198
  config;
@@ -36200,18 +36224,24 @@ class TopologyDiscoveryEngine {
36200
36224
  analysis.error = 'Failed to capture camera snapshot';
36201
36225
  return analysis;
36202
36226
  }
36203
- // Try with detected provider format first, then fallback to alternate format
36204
- const formatsToTry = [this.llmProviderType];
36205
- // Add fallback format
36227
+ // Try with detected provider format first, then fallback to alternates
36228
+ // The order matters: try the most likely formats first
36229
+ const formatsToTry = [];
36230
+ // Start with detected format
36231
+ formatsToTry.push(this.llmProviderType);
36232
+ // Add fallbacks based on detected provider
36206
36233
  if (this.llmProviderType === 'openai') {
36207
- formatsToTry.push('anthropic');
36234
+ formatsToTry.push('scrypted', 'anthropic');
36208
36235
  }
36209
36236
  else if (this.llmProviderType === 'anthropic') {
36210
- formatsToTry.push('openai');
36237
+ formatsToTry.push('scrypted', 'openai');
36238
+ }
36239
+ else if (this.llmProviderType === 'scrypted') {
36240
+ formatsToTry.push('anthropic', 'openai');
36211
36241
  }
36212
36242
  else {
36213
- // Unknown - try both
36214
- formatsToTry.push('openai');
36243
+ // Unknown - try all formats
36244
+ formatsToTry.push('scrypted', 'anthropic', 'openai');
36215
36245
  }
36216
36246
  let lastError = null;
36217
36247
  for (const formatType of formatsToTry) {
@@ -36290,8 +36320,8 @@ class TopologyDiscoveryEngine {
36290
36320
  catch (e) {
36291
36321
  lastError = e;
36292
36322
  // Check if this is a vision/multimodal format error
36293
- if ((0, spatial_reasoning_1.isVisionNotSupportedError)(e)) {
36294
- this.console.warn(`[Discovery] ${formatType} format not supported, trying fallback...`);
36323
+ if ((0, spatial_reasoning_1.isVisionFormatError)(e)) {
36324
+ this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
36295
36325
  continue; // Try next format
36296
36326
  }
36297
36327
  // Not a format error - don't retry
@@ -36302,8 +36332,8 @@ class TopologyDiscoveryEngine {
36302
36332
  // All formats failed
36303
36333
  if (lastError) {
36304
36334
  const errorStr = String(lastError);
36305
- if ((0, spatial_reasoning_1.isVisionNotSupportedError)(lastError)) {
36306
- analysis.error = 'Vision/image analysis not supported by configured LLM. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured.';
36335
+ if ((0, spatial_reasoning_1.isVisionFormatError)(lastError)) {
36336
+ analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
36307
36337
  }
36308
36338
  else {
36309
36339
  analysis.error = `Analysis failed: ${errorStr}`;