@blueharford/scrypted-spatial-awareness 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blueharford/scrypted-spatial-awareness",
3
- "version": "0.5.4",
3
+ "version": "0.5.6",
4
4
  "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
5
5
  "author": "Joshua Seidel <blueharford>",
6
6
  "license": "Apache-2.0",
@@ -84,35 +84,58 @@ export interface ImageData {
84
84
  */
85
85
  export async function mediaObjectToBase64(mediaObject: MediaObject): Promise<ImageData | null> {
86
86
  try {
87
+ console.log(`[Image] Converting MediaObject, mimeType=${mediaObject?.mimeType}`);
88
+
87
89
  // Convert MediaObject to Buffer using mediaManager
88
90
  const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, ScryptedMimeTypes.Image);
89
91
 
92
+ if (!buffer) {
93
+ console.warn('[Image] convertMediaObjectToBuffer returned null/undefined');
94
+ return null;
95
+ }
96
+
97
+ console.log(`[Image] Buffer received: ${buffer.length} bytes`);
98
+
99
+ if (buffer.length === 0) {
100
+ console.warn('[Image] Buffer is empty (0 bytes)');
101
+ return null;
102
+ }
103
+
104
+ // Check if buffer is too small to be a valid image (< 1KB is suspicious)
105
+ if (buffer.length < 1000) {
106
+ // Log what the buffer contains - might be an error message
107
+ const bufferContent = buffer.toString('utf8').substring(0, 100);
108
+ console.warn(`[Image] Buffer too small (${buffer.length} bytes), content: ${bufferContent}`);
109
+ return null;
110
+ }
111
+
90
112
  // Convert buffer to base64 (raw, no data URL prefix)
91
113
  const base64 = buffer.toString('base64');
92
114
 
93
115
  // Determine MIME type - default to JPEG for camera images
94
116
  const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
95
117
 
118
+ console.log(`[Image] Converted to base64: ${base64.length} chars, type=${mediaType}`);
119
+
96
120
  return { base64, mediaType };
97
121
  } catch (e) {
98
- console.warn('Failed to convert MediaObject to base64:', e);
122
+ console.warn('[Image] Failed to convert MediaObject to base64:', e);
99
123
  return null;
100
124
  }
101
125
  }
102
126
 
103
127
  /** LLM Provider type for image format selection */
104
- export type LlmProvider = 'openai' | 'anthropic' | 'unknown';
128
+ export type LlmProvider = 'openai' | 'anthropic' | 'scrypted' | 'unknown';
105
129
 
106
130
  /**
107
131
  * Build image content block for ChatCompletion API
108
- * Supports both OpenAI and Anthropic formats
132
+ * Supports OpenAI, Anthropic, and @scrypted/llm formats
109
133
  * @param imageData - Image data with base64 and media type
110
- * @param provider - The LLM provider type (openai, anthropic, or unknown)
134
+ * @param provider - The LLM provider type
111
135
  */
112
136
  export function buildImageContent(imageData: ImageData, provider: LlmProvider = 'unknown'): any {
113
137
  if (provider === 'openai') {
114
138
  // OpenAI format: uses data URL with image_url wrapper
115
- // Include detail parameter for compatibility
116
139
  return {
117
140
  type: 'image_url',
118
141
  image_url: {
@@ -121,7 +144,7 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
121
144
  },
122
145
  };
123
146
  } else if (provider === 'anthropic') {
124
- // Anthropic format: uses separate base64 data and media_type
147
+ // Anthropic official format: uses 'data' key
125
148
  return {
126
149
  type: 'image',
127
150
  source: {
@@ -130,29 +153,43 @@ export function buildImageContent(imageData: ImageData, provider: LlmProvider =
130
153
  data: imageData.base64,
131
154
  },
132
155
  };
156
+ } else if (provider === 'scrypted') {
157
+ // @scrypted/llm format: uses 'base64' key (per error path .image.source.base64)
158
+ return {
159
+ type: 'image',
160
+ source: {
161
+ type: 'base64',
162
+ media_type: imageData.mediaType,
163
+ base64: imageData.base64,
164
+ },
165
+ };
133
166
  } else {
134
- // Unknown provider: try Anthropic format first as it's more explicit
135
- // Some plugins may translate this to OpenAI format internally
167
+ // Unknown provider: try @scrypted/llm format first
136
168
  return {
137
169
  type: 'image',
138
170
  source: {
139
171
  type: 'base64',
140
172
  media_type: imageData.mediaType,
141
- data: imageData.base64,
173
+ base64: imageData.base64,
142
174
  },
143
175
  };
144
176
  }
145
177
  }
146
178
 
147
- /** Check if an error indicates vision/multimodal content is not supported */
148
- export function isVisionNotSupportedError(error: any): boolean {
179
+ /** Check if an error indicates vision/multimodal content format issue (should try alternate format) */
180
+ export function isVisionFormatError(error: any): boolean {
149
181
  const errorStr = String(error);
150
182
  return (
151
183
  errorStr.includes('content.str') ||
152
184
  errorStr.includes('should be a valid string') ||
153
185
  errorStr.includes('Invalid content type') ||
154
186
  errorStr.includes('does not support vision') ||
155
- errorStr.includes('image_url') && errorStr.includes('not supported')
187
+ errorStr.includes('invalid base64') ||
188
+ errorStr.includes('Invalid base64') ||
189
+ errorStr.includes('.image.source') ||
190
+ errorStr.includes('.image_url') ||
191
+ (errorStr.includes('image_url') && errorStr.includes('not supported')) ||
192
+ (errorStr.includes('400') && errorStr.includes('content'))
156
193
  );
157
194
  }
158
195
 
@@ -30,7 +30,7 @@ import {
30
30
  Landmark,
31
31
  findCamera,
32
32
  } from '../models/topology';
33
- import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider, isVisionNotSupportedError } from './spatial-reasoning';
33
+ import { mediaObjectToBase64, buildImageContent, ImageData, LlmProvider, isVisionFormatError } from './spatial-reasoning';
34
34
 
35
35
  const { systemManager } = sdk;
36
36
 
@@ -213,11 +213,27 @@ export class TopologyDiscoveryEngine {
213
213
  try {
214
214
  const camera = systemManager.getDeviceById<Camera>(cameraId);
215
215
  if (!camera?.interfaces?.includes(ScryptedInterface.Camera)) {
216
+ this.console.warn(`[Discovery] Camera ${cameraId} doesn't have Camera interface`);
216
217
  return null;
217
218
  }
218
219
 
220
+ this.console.log(`[Discovery] Taking snapshot from camera: ${camera.name || cameraId}`);
219
221
  const mediaObject = await camera.takePicture();
220
- return mediaObjectToBase64(mediaObject);
222
+
223
+ if (!mediaObject) {
224
+ this.console.warn(`[Discovery] takePicture() returned null for ${camera.name}`);
225
+ return null;
226
+ }
227
+
228
+ this.console.log(`[Discovery] MediaObject received: mimeType=${mediaObject.mimeType}`);
229
+
230
+ const imageData = await mediaObjectToBase64(mediaObject);
231
+
232
+ if (!imageData) {
233
+ this.console.warn(`[Discovery] Failed to convert MediaObject to base64 for ${camera.name}`);
234
+ }
235
+
236
+ return imageData;
221
237
  } catch (e) {
222
238
  this.console.warn(`[Discovery] Failed to get snapshot from camera ${cameraId}:`, e);
223
239
  return null;
@@ -253,17 +269,23 @@ export class TopologyDiscoveryEngine {
253
269
  return analysis;
254
270
  }
255
271
 
256
- // Try with detected provider format first, then fallback to alternate format
257
- const formatsToTry: LlmProvider[] = [this.llmProviderType];
272
+ // Try with detected provider format first, then fallback to alternates
273
+ // The order matters: try the most likely formats first
274
+ const formatsToTry: LlmProvider[] = [];
275
+
276
+ // Start with detected format
277
+ formatsToTry.push(this.llmProviderType);
258
278
 
259
- // Add fallback format
279
+ // Add fallbacks based on detected provider
260
280
  if (this.llmProviderType === 'openai') {
261
- formatsToTry.push('anthropic');
281
+ formatsToTry.push('scrypted', 'anthropic');
262
282
  } else if (this.llmProviderType === 'anthropic') {
263
- formatsToTry.push('openai');
283
+ formatsToTry.push('scrypted', 'openai');
284
+ } else if (this.llmProviderType === 'scrypted') {
285
+ formatsToTry.push('anthropic', 'openai');
264
286
  } else {
265
- // Unknown - try both
266
- formatsToTry.push('openai');
287
+ // Unknown - try all formats
288
+ formatsToTry.push('scrypted', 'anthropic', 'openai');
267
289
  }
268
290
 
269
291
  let lastError: any = null;
@@ -353,8 +375,8 @@ export class TopologyDiscoveryEngine {
353
375
  lastError = e;
354
376
 
355
377
  // Check if this is a vision/multimodal format error
356
- if (isVisionNotSupportedError(e)) {
357
- this.console.warn(`[Discovery] ${formatType} format not supported, trying fallback...`);
378
+ if (isVisionFormatError(e)) {
379
+ this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
358
380
  continue; // Try next format
359
381
  }
360
382
 
@@ -367,8 +389,8 @@ export class TopologyDiscoveryEngine {
367
389
  // All formats failed
368
390
  if (lastError) {
369
391
  const errorStr = String(lastError);
370
- if (isVisionNotSupportedError(lastError)) {
371
- analysis.error = 'Vision/image analysis not supported by configured LLM. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured.';
392
+ if (isVisionFormatError(lastError)) {
393
+ analysis.error = 'Vision/image analysis failed with all formats. Ensure you have a vision-capable model (e.g., gpt-4o, gpt-4-turbo, claude-3-sonnet) configured and the @scrypted/llm plugin supports vision.';
372
394
  } else {
373
395
  analysis.error = `Analysis failed: ${errorStr}`;
374
396
  }