@blueharford/scrypted-spatial-awareness 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +101 -23
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +81 -18
- package/src/core/topology-discovery.ts +33 -8
package/dist/plugin.zip
CHANGED
|
Binary file
|
package/out/main.nodejs.js
CHANGED
|
@@ -35094,29 +35094,68 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
35094
35094
|
Object.defineProperty(exports, "__esModule", ({ value: true }));
|
|
35095
35095
|
exports.SpatialReasoningEngine = void 0;
|
|
35096
35096
|
exports.mediaObjectToBase64 = mediaObjectToBase64;
|
|
35097
|
+
exports.buildImageContent = buildImageContent;
|
|
35097
35098
|
const sdk_1 = __importStar(__webpack_require__(/*! @scrypted/sdk */ "./node_modules/@scrypted/sdk/dist/src/index.js"));
|
|
35098
35099
|
const topology_1 = __webpack_require__(/*! ../models/topology */ "./src/models/topology.ts");
|
|
35099
35100
|
const { systemManager, mediaManager } = sdk_1.default;
|
|
35100
35101
|
/**
|
|
35101
|
-
* Convert a MediaObject to
|
|
35102
|
+
* Convert a MediaObject to base64 image data for vision LLM consumption
|
|
35102
35103
|
* @param mediaObject - MediaObject from camera.takePicture()
|
|
35103
|
-
* @returns
|
|
35104
|
+
* @returns ImageData with raw base64 and media type, or null if conversion fails
|
|
35104
35105
|
*/
|
|
35105
35106
|
async function mediaObjectToBase64(mediaObject) {
|
|
35106
35107
|
try {
|
|
35107
35108
|
// Convert MediaObject to Buffer using mediaManager
|
|
35108
35109
|
const buffer = await mediaManager.convertMediaObjectToBuffer(mediaObject, sdk_1.ScryptedMimeTypes.Image);
|
|
35109
|
-
// Convert buffer to base64
|
|
35110
|
+
// Convert buffer to base64 (raw, no data URL prefix)
|
|
35110
35111
|
const base64 = buffer.toString('base64');
|
|
35111
35112
|
// Determine MIME type - default to JPEG for camera images
|
|
35112
|
-
const
|
|
35113
|
-
return
|
|
35113
|
+
const mediaType = mediaObject.mimeType?.split(';')[0] || 'image/jpeg';
|
|
35114
|
+
return { base64, mediaType };
|
|
35114
35115
|
}
|
|
35115
35116
|
catch (e) {
|
|
35116
35117
|
console.warn('Failed to convert MediaObject to base64:', e);
|
|
35117
35118
|
return null;
|
|
35118
35119
|
}
|
|
35119
35120
|
}
|
|
35121
|
+
/**
|
|
35122
|
+
* Build image content block for ChatCompletion API
|
|
35123
|
+
* Supports both OpenAI and Anthropic formats
|
|
35124
|
+
* @param imageData - Image data with base64 and media type
|
|
35125
|
+
* @param provider - The LLM provider type (openai, anthropic, or unknown)
|
|
35126
|
+
*/
|
|
35127
|
+
function buildImageContent(imageData, provider = 'unknown') {
|
|
35128
|
+
if (provider === 'openai') {
|
|
35129
|
+
// OpenAI format: uses data URL with image_url wrapper
|
|
35130
|
+
return {
|
|
35131
|
+
type: 'image_url',
|
|
35132
|
+
image_url: {
|
|
35133
|
+
url: `data:${imageData.mediaType};base64,${imageData.base64}`,
|
|
35134
|
+
},
|
|
35135
|
+
};
|
|
35136
|
+
}
|
|
35137
|
+
else if (provider === 'anthropic') {
|
|
35138
|
+
// Anthropic format: uses separate base64 data and media_type
|
|
35139
|
+
return {
|
|
35140
|
+
type: 'image',
|
|
35141
|
+
source: {
|
|
35142
|
+
type: 'base64',
|
|
35143
|
+
media_type: imageData.mediaType,
|
|
35144
|
+
data: imageData.base64,
|
|
35145
|
+
},
|
|
35146
|
+
};
|
|
35147
|
+
}
|
|
35148
|
+
else {
|
|
35149
|
+
// Unknown provider: try OpenAI format as it's more commonly supported
|
|
35150
|
+
// Most LLM wrappers (including @scrypted/llm) understand the OpenAI format
|
|
35151
|
+
return {
|
|
35152
|
+
type: 'image_url',
|
|
35153
|
+
image_url: {
|
|
35154
|
+
url: `data:${imageData.mediaType};base64,${imageData.base64}`,
|
|
35155
|
+
},
|
|
35156
|
+
};
|
|
35157
|
+
}
|
|
35158
|
+
}
|
|
35120
35159
|
class SpatialReasoningEngine {
|
|
35121
35160
|
config;
|
|
35122
35161
|
console;
|
|
@@ -35336,6 +35375,7 @@ class SpatialReasoningEngine {
|
|
|
35336
35375
|
}
|
|
35337
35376
|
llmSearched = false;
|
|
35338
35377
|
llmProvider = null;
|
|
35378
|
+
llmProviderType = 'unknown';
|
|
35339
35379
|
/** Find or initialize LLM device - looks for ChatCompletion interface from @scrypted/llm plugin */
|
|
35340
35380
|
async findLlmDevice() {
|
|
35341
35381
|
if (this.llmDevice)
|
|
@@ -35354,30 +35394,39 @@ class SpatialReasoningEngine {
|
|
|
35354
35394
|
if (device.interfaces?.includes('ChatCompletion')) {
|
|
35355
35395
|
const deviceName = device.name?.toLowerCase() || '';
|
|
35356
35396
|
const pluginId = device.pluginId?.toLowerCase() || '';
|
|
35357
|
-
// Identify the provider type for logging
|
|
35397
|
+
// Identify the provider type for logging and image format selection
|
|
35358
35398
|
let providerType = 'Unknown';
|
|
35359
|
-
|
|
35360
|
-
providerType = 'Scrypted LLM';
|
|
35361
|
-
}
|
|
35399
|
+
let providerTypeEnum = 'unknown';
|
|
35362
35400
|
if (deviceName.includes('openai') || deviceName.includes('gpt')) {
|
|
35363
35401
|
providerType = 'OpenAI';
|
|
35402
|
+
providerTypeEnum = 'openai';
|
|
35364
35403
|
}
|
|
35365
35404
|
else if (deviceName.includes('anthropic') || deviceName.includes('claude')) {
|
|
35366
35405
|
providerType = 'Anthropic';
|
|
35406
|
+
providerTypeEnum = 'anthropic';
|
|
35367
35407
|
}
|
|
35368
35408
|
else if (deviceName.includes('ollama')) {
|
|
35369
35409
|
providerType = 'Ollama';
|
|
35410
|
+
providerTypeEnum = 'openai'; // Ollama uses OpenAI-compatible format
|
|
35370
35411
|
}
|
|
35371
35412
|
else if (deviceName.includes('gemini') || deviceName.includes('google')) {
|
|
35372
35413
|
providerType = 'Google';
|
|
35414
|
+
providerTypeEnum = 'openai'; // Google uses OpenAI-compatible format
|
|
35373
35415
|
}
|
|
35374
35416
|
else if (deviceName.includes('llama')) {
|
|
35375
35417
|
providerType = 'llama.cpp';
|
|
35418
|
+
providerTypeEnum = 'openai'; // llama.cpp uses OpenAI-compatible format
|
|
35419
|
+
}
|
|
35420
|
+
else if (pluginId.includes('@scrypted/llm') || pluginId.includes('llm')) {
|
|
35421
|
+
providerType = 'Scrypted LLM';
|
|
35422
|
+
providerTypeEnum = 'unknown';
|
|
35376
35423
|
}
|
|
35377
35424
|
this.llmDevice = device;
|
|
35378
35425
|
this.llmProvider = `${providerType} (${device.name})`;
|
|
35426
|
+
this.llmProviderType = providerTypeEnum;
|
|
35379
35427
|
this.console.log(`[LLM] Connected to ${providerType}: ${device.name}`);
|
|
35380
35428
|
this.console.log(`[LLM] Plugin: ${pluginId || 'N/A'}`);
|
|
35429
|
+
this.console.log(`[LLM] Image format: ${providerTypeEnum}`);
|
|
35381
35430
|
this.console.log(`[LLM] Interfaces: ${device.interfaces?.join(', ')}`);
|
|
35382
35431
|
return this.llmDevice;
|
|
35383
35432
|
}
|
|
@@ -35395,6 +35444,10 @@ class SpatialReasoningEngine {
|
|
|
35395
35444
|
getLlmProvider() {
|
|
35396
35445
|
return this.llmProvider;
|
|
35397
35446
|
}
|
|
35447
|
+
/** Get the current LLM provider type for image format selection */
|
|
35448
|
+
getLlmProviderType() {
|
|
35449
|
+
return this.llmProviderType;
|
|
35450
|
+
}
|
|
35398
35451
|
/** Check if LLM is available */
|
|
35399
35452
|
isLlmAvailable() {
|
|
35400
35453
|
return this.llmDevice !== null;
|
|
@@ -35661,7 +35714,7 @@ class SpatialReasoningEngine {
|
|
|
35661
35714
|
return null;
|
|
35662
35715
|
try {
|
|
35663
35716
|
// Convert image to base64 for vision LLM
|
|
35664
|
-
const
|
|
35717
|
+
const imageData = await mediaObjectToBase64(mediaObject);
|
|
35665
35718
|
// Retrieve relevant context for RAG
|
|
35666
35719
|
const relevantChunks = this.retrieveRelevantContext(fromCamera.deviceId, toCamera.deviceId);
|
|
35667
35720
|
// Build RAG context
|
|
@@ -35670,11 +35723,11 @@ class SpatialReasoningEngine {
|
|
|
35670
35723
|
const prompt = this.buildLlmPrompt(tracked, fromCamera, toCamera, transitTime, fromLandmarks, toLandmarks, ragContext);
|
|
35671
35724
|
// Build message content - use multimodal format if we have an image
|
|
35672
35725
|
let messageContent;
|
|
35673
|
-
if (
|
|
35674
|
-
// Vision-capable multimodal message format (
|
|
35726
|
+
if (imageData) {
|
|
35727
|
+
// Vision-capable multimodal message format (provider-specific)
|
|
35675
35728
|
messageContent = [
|
|
35676
35729
|
{ type: 'text', text: prompt },
|
|
35677
|
-
|
|
35730
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
35678
35731
|
];
|
|
35679
35732
|
}
|
|
35680
35733
|
else {
|
|
@@ -35741,7 +35794,7 @@ Generate ONLY the description, nothing else:`;
|
|
|
35741
35794
|
return null;
|
|
35742
35795
|
try {
|
|
35743
35796
|
// Convert image to base64 for vision LLM
|
|
35744
|
-
const
|
|
35797
|
+
const imageData = await mediaObjectToBase64(mediaObject);
|
|
35745
35798
|
const prompt = `Analyze this security camera image. A ${objectClass} was detected.
|
|
35746
35799
|
|
|
35747
35800
|
Looking at the surroundings and environment, identify any notable landmarks or features visible that could help describe this location. Consider:
|
|
@@ -35756,11 +35809,11 @@ If you can identify a clear landmark feature, respond with ONLY a JSON object:
|
|
|
35756
35809
|
If no clear landmark is identifiable, respond with: {"name": null}`;
|
|
35757
35810
|
// Build message content - use multimodal format if we have an image
|
|
35758
35811
|
let messageContent;
|
|
35759
|
-
if (
|
|
35760
|
-
// Vision-capable multimodal message format (
|
|
35812
|
+
if (imageData) {
|
|
35813
|
+
// Vision-capable multimodal message format (provider-specific)
|
|
35761
35814
|
messageContent = [
|
|
35762
35815
|
{ type: 'text', text: prompt },
|
|
35763
|
-
|
|
35816
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
35764
35817
|
];
|
|
35765
35818
|
}
|
|
35766
35819
|
else {
|
|
@@ -36000,6 +36053,7 @@ class TopologyDiscoveryEngine {
|
|
|
36000
36053
|
topology = null;
|
|
36001
36054
|
llmDevice = null;
|
|
36002
36055
|
llmSearched = false;
|
|
36056
|
+
llmProviderType = 'unknown';
|
|
36003
36057
|
// Scene analysis cache (camera ID -> analysis)
|
|
36004
36058
|
sceneCache = new Map();
|
|
36005
36059
|
// Pending suggestions for user review
|
|
@@ -36063,8 +36117,25 @@ class TopologyDiscoveryEngine {
|
|
|
36063
36117
|
if (!device)
|
|
36064
36118
|
continue;
|
|
36065
36119
|
if (device.interfaces?.includes('ChatCompletion')) {
|
|
36120
|
+
const deviceName = device.name?.toLowerCase() || '';
|
|
36121
|
+
// Detect provider type for image format selection
|
|
36122
|
+
if (deviceName.includes('openai') || deviceName.includes('gpt')) {
|
|
36123
|
+
this.llmProviderType = 'openai';
|
|
36124
|
+
}
|
|
36125
|
+
else if (deviceName.includes('anthropic') || deviceName.includes('claude')) {
|
|
36126
|
+
this.llmProviderType = 'anthropic';
|
|
36127
|
+
}
|
|
36128
|
+
else if (deviceName.includes('ollama') || deviceName.includes('gemini') ||
|
|
36129
|
+
deviceName.includes('google') || deviceName.includes('llama')) {
|
|
36130
|
+
// These providers use OpenAI-compatible format
|
|
36131
|
+
this.llmProviderType = 'openai';
|
|
36132
|
+
}
|
|
36133
|
+
else {
|
|
36134
|
+
this.llmProviderType = 'unknown';
|
|
36135
|
+
}
|
|
36066
36136
|
this.llmDevice = device;
|
|
36067
36137
|
this.console.log(`[Discovery] Connected to LLM: ${device.name}`);
|
|
36138
|
+
this.console.log(`[Discovery] Image format: ${this.llmProviderType}`);
|
|
36068
36139
|
return this.llmDevice;
|
|
36069
36140
|
}
|
|
36070
36141
|
}
|
|
@@ -36075,7 +36146,7 @@ class TopologyDiscoveryEngine {
|
|
|
36075
36146
|
}
|
|
36076
36147
|
return null;
|
|
36077
36148
|
}
|
|
36078
|
-
/** Get camera snapshot as
|
|
36149
|
+
/** Get camera snapshot as ImageData */
|
|
36079
36150
|
async getCameraSnapshot(cameraId) {
|
|
36080
36151
|
try {
|
|
36081
36152
|
const camera = systemManager.getDeviceById(cameraId);
|
|
@@ -36110,20 +36181,20 @@ class TopologyDiscoveryEngine {
|
|
|
36110
36181
|
analysis.error = 'No LLM device available';
|
|
36111
36182
|
return analysis;
|
|
36112
36183
|
}
|
|
36113
|
-
const
|
|
36114
|
-
if (!
|
|
36184
|
+
const imageData = await this.getCameraSnapshot(cameraId);
|
|
36185
|
+
if (!imageData) {
|
|
36115
36186
|
analysis.error = 'Failed to capture camera snapshot';
|
|
36116
36187
|
return analysis;
|
|
36117
36188
|
}
|
|
36118
36189
|
try {
|
|
36119
|
-
// Build multimodal message
|
|
36190
|
+
// Build multimodal message with provider-specific image format
|
|
36120
36191
|
const result = await llm.getChatCompletion({
|
|
36121
36192
|
messages: [
|
|
36122
36193
|
{
|
|
36123
36194
|
role: 'user',
|
|
36124
36195
|
content: [
|
|
36125
36196
|
{ type: 'text', text: SCENE_ANALYSIS_PROMPT },
|
|
36126
|
-
|
|
36197
|
+
(0, spatial_reasoning_1.buildImageContent)(imageData, this.llmProviderType),
|
|
36127
36198
|
],
|
|
36128
36199
|
},
|
|
36129
36200
|
],
|
|
@@ -36258,6 +36329,13 @@ class TopologyDiscoveryEngine {
|
|
|
36258
36329
|
}
|
|
36259
36330
|
this.status.camerasAnalyzed = analyses.length;
|
|
36260
36331
|
this.console.log(`[Discovery] Analyzed ${analyses.length} cameras successfully`);
|
|
36332
|
+
// Handle case where no cameras were successfully analyzed
|
|
36333
|
+
if (analyses.length === 0) {
|
|
36334
|
+
this.console.warn('[Discovery] No cameras were successfully analyzed');
|
|
36335
|
+
this.status.lastError = 'No cameras were successfully analyzed - check LLM configuration';
|
|
36336
|
+
this.status.lastScanTime = Date.now();
|
|
36337
|
+
return null;
|
|
36338
|
+
}
|
|
36261
36339
|
// Correlate if we have multiple cameras
|
|
36262
36340
|
let correlation = null;
|
|
36263
36341
|
if (analyses.length >= 2) {
|
|
@@ -36266,7 +36344,7 @@ class TopologyDiscoveryEngine {
|
|
|
36266
36344
|
this.generateSuggestionsFromCorrelation(correlation);
|
|
36267
36345
|
}
|
|
36268
36346
|
}
|
|
36269
|
-
else {
|
|
36347
|
+
else if (analyses.length === 1) {
|
|
36270
36348
|
// Single camera - generate suggestions from its analysis
|
|
36271
36349
|
this.generateSuggestionsFromAnalysis(analyses[0]);
|
|
36272
36350
|
}
|