@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.zip CHANGED
Binary file
@@ -35095,7 +35095,7 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
35095
35095
  exports.SpatialReasoningEngine = void 0;
35096
35096
  exports.mediaObjectToBase64 = mediaObjectToBase64;
35097
35097
  exports.buildImageContent = buildImageContent;
35098
- exports.isVisionNotSupportedError = isVisionNotSupportedError;
35098
+ exports.isVisionFormatError = isVisionFormatError;
35099
35099
  const sdk_1 = __importStar(__webpack_require__(/*! @scrypted/sdk */ "./node_modules/@scrypted/sdk/dist/src/index.js"));
35100
35100
  const topology_1 = __webpack_require__(/*! ../models/topology */ "./src/models/topology.ts");
35101
35101
  const { systemManager, mediaManager } = sdk_1.default;
@@ -35106,29 +35106,46 @@ const { systemManager, mediaManager } = sdk_1.default;
35106
35106
  */
35107
35107
  async function mediaObjectToBase64(mediaObject) {
35108
35108
  try {
35109
+ console.log(`[Image] Converting MediaObject, mimeType=${mediaObject?.mimeType}`);
35109
35110
  // Convert MediaObject to Buffer using mediaManager
35110
35111
  const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, sdk_1.ScryptedMimeTypes.Image);
35112
+ if (!buffer) {
35113
+ console.warn('[Image] convertMediaObjectToBuffer returned null/undefined');
35114
+ return null;
35115
+ }
35116
+ console.log(`[Image] Buffer received: ${buffer.length} bytes`);
35117
+ if (buffer.length === 0) {
35118
+ console.warn('[Image] Buffer is empty (0 bytes)');
35119
+ return null;
35120
+ }
35121
+ // Check if buffer is too small to be a valid image (< 1KB is suspicious)
35122
+ if (buffer.length < 1000) {
35123
+ // Log what the buffer contains - might be an error message
35124
+ const bufferContent = buffer.toString('utf8').substring(0, 100);
35125
+ console.warn(`[Image] Buffer too small (${buffer.length} bytes), content: ${bufferContent}`);
35126
+ return null;
35127
+ }
35111
35128
  // Convert buffer to base64 (raw, no data URL prefix)
35112
35129
  const base64 = buffer.toString('base64');
35113
35130
  // Determine MIME type - default to JPEG for camera images
35114
35131
  const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
35132
+ console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
35115
35133
  return { base64, mediaType };
35116
35134
  }
35117
35135
  catch (e) {
35118
- console.warn('Failed to convert MediaObject to base64:', e);
35136
+ console.warn('[Image] Failed to convert MediaObject to base64:', e);
35119
35137
  return null;
35120
35138
  }
35121
35139
  }
35122
35140
  /**
35123
35141
  * Build image content block for ChatCompletion API
35124
- * Supports both OpenAI and Anthropic formats
35142
+ * Supports OpenAI, Anthropic, and @scrypted/llm formats
35125
35143
  * @param imageData - Image data with base64 and media type
35126
- * @param provider - The LLM provider type (openai, anthropic, or unknown)
35144
+ * @param provider - The LLM provider type
35127
35145
  */
35128
35146
  function buildImageContent(imageData, provider = 'unknown') {
35129
35147
  if (provider === 'openai') {
35130
35148
  // OpenAI format: uses data URL with image_url wrapper
35131
- // Include detail parameter for compatibility
35132
35149
  return {
35133
35150
  type: 'image_url',
35134
35151
  image_url: {
@@ -35138,7 +35155,7 @@ function buildImageContent(imageData, provider = 'unknown') {
35138
35155
  };
35139
35156
  }
35140
35157
  else if (provider === 'anthropic') {
35141
- // Anthropic format: uses separate base64 data and media_type
35158
+ // Anthropic official format: uses 'data' key
35142
35159
  return {
35143
35160
  type: 'image',
35144
35161
  source: {
@@ -35148,27 +35165,42 @@ function buildImageContent(imageData, provider = 'unknown') {
35148
35165
  },
35149
35166
  };
35150
35167
  }
35168
+ else if (provider === 'scrypted') {
35169
+ // @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
35170
+ return {
35171
+ type: 'image',
35172
+ source: {
35173
+ type: 'base64',
35174
+ media_type: imageData.mediaType,
35175
+ base64: imageData.base64,
35176
+ },
35177
+ };
35178
+ }
35151
35179
  else {
35152
- // Unknown provider: try Anthropic format first as it's more explicit
35153
- // Some plugins may translate this to OpenAI format internally
35180
+ // Unknown provider: try @scrypted/llm format first
35154
35181
  return {
35155
35182
  type: 'image',
35156
35183
  source: {
35157
35184
  type: 'base64',
35158
35185
  media_type: imageData.mediaType,
35159
- data: imageData.base64,
35186
+ base64: imageData.base64,
35160
35187
  },
35161
35188
  };
35162
35189
  }
35163
35190
  }
35164
- /** Check if an error indicates vision/multimodal content is not supported */
35165
- function isVisionNotSupportedError(error) {
35191
+ /** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
35192
+ function isVisionFormatError(error) {
35166
35193
  const errorStr = String(error);
35167
35194
  return (errorStr.includes('content.str') ||
35168
35195
  errorStr.includes('should be a valid string') ||
35169
35196
  errorStr.includes('Invalid content type') ||
35170
35197
  errorStr.includes('does not support vision') ||
35171
- errorStr.includes('image_url') && errorStr.includes('not supported'));
35198
+ errorStr.includes('invalid base64') ||
35199
+ errorStr.includes('Invalid base64') ||
35200
+ errorStr.includes('.image.source') ||
35201
+ errorStr.includes('.image_url') ||
35202
+ (errorStr.includes('image_url') && errorStr.includes('not supported')) ||
35203
+ (errorStr.includes('400') && errorStr.includes('content')));
35172
35204
  }
35173
35205
  class SpatialReasoningEngine {
35174
35206
  config;
@@ -36165,10 +36197,21 @@ class TopologyDiscoveryEngine {
36165
36197
  try {
36166
36198
  const camera = systemManager.getDeviceById(cameraId);
36167
36199
  if (!camera?.interfaces?.includes(sdk_1.ScryptedInterface.Camera)) {
36200
+ this.console.warn(`[Discovery] Camera ${cameraId} doesn't have Camera interface`);
36168
36201
  return null;
36169
36202
  }
36203
+ this.console.log(`[Discovery] Taking snapshot from camera: ${camera.name || cameraId}`);
36170
36204
  const mediaObject = await camera.takePicture();
36171
- return (0, spatial_reasoning_1.mediaObjectToBase64)(mediaObject);
36205
+ if (!mediaObject) {
36206
+ this.console.warn(`[Discovery] takePicture() returned null for ${camera.name}`);
36207
+ return null;
36208
+ }
36209
+ this.console.log(`[Discovery] MediaObject received: mimeType=${mediaObject.mimeType}`);
36210
+ const imageData = await (0, spatial_reasoning_1.mediaObjectToBase64)(mediaObject);
36211
+ if (!imageData) {
36212
+ this.console.warn(`[Discovery] Failed to convert MediaObject to base64 for ${camera.name}`);
36213
+ }
36214
+ return imageData;
36172
36215
  }
36173
36216
  catch (e) {
36174
36217
  this.console.warn(`[Discovery] Failed to get snapshot from camera ${cameraId}:`, e);
@@ -36200,18 +36243,24 @@ class TopologyDiscoveryEngine {
36200
36243
  analysis.error = 'Failed to capture camera snapshot';
36201
36244
  return analysis;
36202
36245
  }
36203
- // Try with detected provider format first, then fallback to alternate format
36204
- const formatsToTry = [this.llmProviderType];
36205
- // Add fallback format
36246
+ // Try with detected provider format first, then fallback to alternates
36247
+ // The order matters: try the most likely formats first
36248
+ const formatsToTry = [];
36249
+ // Start with detected format
36250
+ formatsToTry.push(this.llmProviderType);
36251
+ // Add fallbacks based on detected provider
36206
36252
  if (this.llmProviderType === 'openai') {
36207
- formatsToTry.push('anthropic');
36253
+ formatsToTry.push('scrypted', 'anthropic');
36208
36254
  }
36209
36255
  else if (this.llmProviderType === 'anthropic') {
36210
- formatsToTry.push('openai');
36256
+ formatsToTry.push('scrypted', 'openai');
36257
+ }
36258
+ else if (this.llmProviderType === 'scrypted') {
36259
+ formatsToTry.push('anthropic', 'openai');
36211
36260
  }
36212
36261
  else {
36213
- // Unknown - try both
36214
- formatsToTry.push('openai');
36262
+ // Unknown - try all formats
36263
+ formatsToTry.push('scrypted', 'anthropic', 'openai');
36215
36264
  }
36216
36265
  let lastError = null;
36217
36266
  for (const formatType of formatsToTry) {
@@ -36290,8 +36339,8 @@ class TopologyDiscoveryEngine {
36290
36339
  catch (e) {
36291
36340
  lastError = e;
36292
36341
  // Check if this is a vision/multimodal format error
36293
- if ((0, spatial_reasoning_1.isVisionNotSupportedError)(e)) {
36294
- this.console.warn(`[Discovery] ${formatType} format not supported, trying fallback...`);
36342
+ if ((0, spatial_reasoning_1.isVisionFormatError)(e)) {
36343
+ this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
36295
36344
  continue; // Try next format
36296
36345
  }
36297
36346
  // Not a format error - don't retry
@@ -36302,8 +36351,8 @@ class TopologyDiscoveryEngine {
36302
36351
  // All formats failed
36303
36352
  if (lastError) {
36304
36353
  const errorStr = String(lastError);
36305
- if ((0, spatial_reasoning_1.isVisionNotSupportedError)(lastError)) {
36306
- analysis.error = 'Vision/image analysis not supported by configured LLM. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured.';
36354
+ if ((0, spatial_reasoning_1.isVisionFormatError)(lastError)) {
36355
+ analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
36307
36356
  }
36308
36357
  else {
36309
36358
  analysis.error = `Analysis failed: ${errorStr}`;