@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blueharford/scrypted-spatial-awareness",
3
- "version": "0.5.4",
3
+ "version": "0.5.5",
4
4
  "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
5
5
  "author": "Joshua Seidel <blueharford>",
6
6
  "license": "Apache-2.0",
@@ -87,12 +87,25 @@ export async function mediaObjectToBase64(mediaObject: MediaObject): Promise<Ima
87
87
  // Convert MediaObject to Buffer using mediaManager
88
88
  const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, ScryptedMimeTypes.Image);
89
89
 
90
+ if (!buffer || buffer.length === 0) {
91
+ console.warn('Failed to convert MediaObject: empty buffer');
92
+ return null;
93
+ }
94
+
90
95
  // Convert buffer to base64 (raw, no data URL prefix)
91
96
  const base64 = buffer.toString('base64');
92
97
 
98
+ // Validate base64 - check it's not empty and looks valid
99
+ if (!base64 || base64.length < 100) {
100
+ console.warn(`Invalid base64: length=${base64?.length || 0}`);
101
+ return null;
102
+ }
103
+
93
104
  // Determine MIME type - default to JPEG for camera images
94
105
  const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
95
106
 
107
+ console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
108
+
96
109
  return { base64, mediaType };
97
110
  } catch (e) {
98
111
  console.warn('Failed to convert MediaObject to base64:', e);
@@ -101,18 +114,17 @@ export async function mediaObjectToBase64(mediaObject: MediaObject): Promise<Ima
101
114
  }
102
115
 
103
116
  /** LLM Provider type for image format selection */
104
- export type LlmProvider = 'openai' | 'anthropic' | 'unknown';
117
+ export type LlmProvider = 'openai' | 'anthropic' | 'scrypted' | 'unknown';
105
118
 
106
119
  /**
107
120
  * Build image content block for ChatCompletion API
108
- * Supports both OpenAI and Anthropic formats
121
+ * Supports OpenAI, Anthropic, and @scrypted/llm formats
109
122
  * @param imageData - Image data with base64 and media type
110
- * @param provider - The LLM provider type (openai, anthropic, or unknown)
123
+ * @param provider - The LLM provider type
111
124
  */
112
125
  export function buildImageContent(imageData: ImageData, provider: LlmProvider = 'unknown'): any {
113
126
  if (provider === 'openai') {
114
127
  // OpenAI format: uses data URL with image_url wrapper
115
- // Include detail parameter for compatibility
116
128
  return {
117
129
  type: 'image_url',
118
130
  image_url: {
@@ -121,7 +133,7 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
121
133
  },
122
134
  };
123
135
  } else if (provider === 'anthropic') {
124
- // Anthropic format: uses separate base64 data and media_type
136
+ // Anthropic official format: uses 'data' key
125
137
  return {
126
138
  type: 'image',
127
139
  source: {
@@ -130,29 +142,43 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
130
142
  data: imageData.base64,
131
143
  },
132
144
  };
145
+ } else if (provider === 'scrypted') {
146
+ // @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
147
+ return {
148
+ type: 'image',
149
+ source: {
150
+ type: 'base64',
151
+ media_type: imageData.mediaType,
152
+ base64: imageData.base64,
153
+ },
154
+ };
133
155
  } else {
134
- // Unknown provider: try Anthropic format first as it's more explicit
135
- // Some plugins may translate this to OpenAI format internally
156
+ // Unknown provider: try @scrypted/llm format first
136
157
  return {
137
158
  type: 'image',
138
159
  source: {
139
160
  type: 'base64',
140
161
  media_type: imageData.mediaType,
141
- data: imageData.base64,
162
+ base64: imageData.base64,
142
163
  },
143
164
  };
144
165
  }
145
166
  }
146
167
 
147
- /** Check if an error indicates vision/multimodal content is not supported */
148
- export function isVisionNotSupportedError(error: any): boolean {
168
+ /** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
169
+ export function isVisionFormatError(error: any): boolean {
149
170
  const errorStr = String(error);
150
171
  return (
151
172
  errorStr.includes('content.str') ||
152
173
  errorStr.includes('should be a valid string') ||
153
174
  errorStr.includes('Invalid content type') ||
154
175
  errorStr.includes('does not support vision') ||
155
- errorStr.includes('image_url') && errorStr.includes('not supported')
176
+ errorStr.includes('invalid base64') ||
177
+ errorStr.includes('Invalid base64') ||
178
+ errorStr.includes('.image.source') ||
179
+ errorStr.includes('.image_url') ||
180
+ (errorStr.includes('image_url') && errorStr.includes('not supported')) ||
181
+ (errorStr.includes('400') && errorStr.includes('content'))
156
182
  );
157
183
  }
158
184
 
@@ -30,7 +30,7 @@ import {
30
30
  Landmark,
31
31
  findCamera,
32
32
  } from '../models/topology';
33
- import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider, isVisionNotSupportedError } from './spatial-reasoning';
33
+ import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider, isVisionFormatError } from './spatial-reasoning';
34
34
 
35
35
  const { systemManager } = sdk;
36
36
 
@@ -253,17 +253,23 @@ export class TopologyDiscoveryEngine {
253
253
  return analysis;
254
254
  }
255
255
 
256
- // Try with detected provider format first, then fallback to alternate format
257
- const formatsToTry: LlmProvider[] = [this.llmProviderType];
256
+ // Try with detected provider format first, then fallback to alternates
257
+ // The order matters: try the most likely formats first
258
+ const formatsToTry: LlmProvider[] = [];
258
259
 
259
- // Add fallback format
260
+ // Start with detected format
261
+ formatsToTry.push(this.llmProviderType);
262
+
263
+ // Add fallbacks based on detected provider
260
264
  if (this.llmProviderType === 'openai') {
261
- formatsToTry.push('anthropic');
265
+ formatsToTry.push('scrypted', 'anthropic');
262
266
  } else if (this.llmProviderType === 'anthropic') {
263
- formatsToTry.push('openai');
267
+ formatsToTry.push('scrypted', 'openai');
268
+ } else if (this.llmProviderType === 'scrypted') {
269
+ formatsToTry.push('anthropic', 'openai');
264
270
  } else {
265
- // Unknown - try both
266
- formatsToTry.push('openai');
271
+ // Unknown - try all formats
272
+ formatsToTry.push('scrypted', 'anthropic', 'openai');
267
273
  }
268
274
 
269
275
  let lastError: any = null;
@@ -353,8 +359,8 @@ export class TopologyDiscoveryEngine {
353
359
  lastError = e;
354
360
 
355
361
  // Check if this is a vision/multimodal format error
356
- if (isVisionNotSupportedError(e)) {
357
- this.console.warn(`[Discovery] ${formatType} format not supported, trying fallback...`);
362
+ if (isVisionFormatError(e)) {
363
+ this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
358
364
  continue; // Try next format
359
365
  }
360
366
 
@@ -367,8 +373,8 @@ export class TopologyDiscoveryEngine {
367
373
  // All formats failed
368
374
  if (lastError) {
369
375
  const errorStr = String(lastError);
370
- if (isVisionNotSupportedError(lastError)) {
371
- analysis.error = 'Vision/image analysis not supported by configured LLM. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured.';
376
+ if (isVisionFormatError(lastError)) {
377
+ analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
372
378
  } else {
373
379
  analysis.error = `Analysis failed: ${errorStr}`;
374
380
  }