@blueharford/scrypted-spatial-awareness 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.nodejs.js +1 -1
- package/dist/main.nodejs.js.map +1 -1
- package/dist/plugin.zip +0 -0
- package/out/main.nodejs.js +76 -22
- package/out/main.nodejs.js.map +1 -1
- package/out/plugin.zip +0 -0
- package/package.json +1 -1
- package/src/core/spatial-reasoning.ts +58 -20
- package/src/core/topology-discovery.ts +20 -3
package/dist/plugin.zip
CHANGED
|
Binary file
|
package/out/main.nodejs.js
CHANGED
|
@@ -35120,19 +35120,41 @@ async function mediaObjectToBase64(mediaObject) {
|
|
|
35120
35120
|
}
|
|
35121
35121
|
/**
|
|
35122
35122
|
* Build image content block for ChatCompletion API
|
|
35123
|
-
*
|
|
35123
|
+
* Supports both OpenAI and Anthropic formats
|
|
35124
|
+
* @param imageData - Image data with base64 and media type
|
|
35125
|
+
* @param provider - The LLM provider type (openai, anthropic, or unknown)
|
|
35124
35126
|
*/
|
|
35125
|
-
function buildImageContent(imageData) {
|
|
35126
|
-
|
|
35127
|
-
|
|
35128
|
-
|
|
35129
|
-
|
|
35130
|
-
|
|
35131
|
-
|
|
35132
|
-
|
|
35133
|
-
|
|
35134
|
-
|
|
35135
|
-
|
|
35127
|
+
function buildImageContent(imageData, provider = 'unknown') {
|
|
35128
|
+
if (provider === 'openai') {
|
|
35129
|
+
// OpenAI format: uses data URL with image_url wrapper
|
|
35130
|
+
return {
|
|
35131
|
+
type: 'image_url',
|
|
35132
|
+
image_url: {
|
|
35133
|
+
url: `data:${imageData.mediaType};base64,${imageData.base64}`,
|
|
35134
|
+
},
|
|
35135
|
+
};
|
|
35136
|
+
}
|
|
35137
|
+
else if (provider === 'anthropic') {
|
|
35138
|
+
// Anthropic format: uses separate base64 data and media_type
|
|
35139
|
+
return {
|
|
35140
|
+
type: 'image',
|
|
35141
|
+
source: {
|
|
35142
|
+
type: 'base64',
|
|
35143
|
+
media_type: imageData.mediaType,
|
|
35144
|
+
data: imageData.base64,
|
|
35145
|
+
},
|
|
35146
|
+
};
|
|
35147
|
+
}
|
|
35148
|
+
else {
|
|
35149
|
+
// Unknown provider: try OpenAI format as it's more commonly supported
|
|
35150
|
+
// Most LLM wrappers (including @scrypted/llm) understand the OpenAI format
|
|
35151
|
+
return {
|
|
35152
|
+
type: 'image_url',
|
|
35153
|
+
image_url: {
|
|
35154
|
+
url: `data:${imageData.mediaType};base64,${imageData.base64}`,
|
|
35155
|
+
},
|
|
35156
|
+
};
|
|
35157
|
+
}
|
|
35136
35158
|
}
|
|
35137
35159
|
class SpatialReasoningEngine {
|
|
35138
35160
|
config;
|
|
@@ -35353,6 +35375,7 @@ class SpatialReasoningEngine {
|
|
|
35353
35375
|
}
|
|
35354
35376
|
llmSearched = false;
|
|
35355
35377
|
llmProvider = null;
|
|
35378
|
+
llmProviderType = 'unknown';
|
|
35356
35379
|
/** Find or initialize LLM device - looks for ChatCompletion interface from @scrypted/llm plugin */
|
|
35357
35380
|
async findLlmDevice() {
|
|
35358
35381
|
if (this.llmDevice)
|
|
@@ -35371,30 +35394,39 @@ class SpatialReasoningEngine {
|
|
|
35371
35394
|
if (device.interfaces?.includes('ChatCompletion')) {
|
|
35372
35395
|
const deviceName = device.name?.toLowerCase() || '';
|
|
35373
35396
|
const pluginId = device.pluginId?.toLowerCase() || '';
|
|
35374
|
-
// Identify the provider type for logging
|
|
35397
|
+
// Identify the provider type for logging and image format selection
|
|
35375
35398
|
let providerType = 'Unknown';
|
|
35376
|
-
|
|
35377
|
-
providerType = 'Scrypted LLM';
|
|
35378
|
-
}
|
|
35399
|
+
let providerTypeEnum = 'unknown';
|
|
35379
35400
|
if (deviceName.includes('openai') || deviceName.includes('gpt')) {
|
|
35380
35401
|
providerType = 'OpenAI';
|
|
35402
|
+
providerTypeEnum = 'openai';
|
|
35381
35403
|
}
|
|
35382
35404
|
else if (deviceName.includes('anthropic') || deviceName.includes('claude')) {
|
|
35383
35405
|
providerType = 'Anthropic';
|
|
35406
|
+
providerTypeEnum = 'anthropic';
|
|
35384
35407
|
}
|
|
35385
35408
|
else if (deviceName.includes('ollama')) {
|
|
35386
35409
|
providerType = 'Ollama';
|
|
35410
|
+
providerTypeEnum = 'openai'; // Ollama uses OpenAI-compatible format
|
|
35387
35411
|
}
|
|
35388
35412
|
else if (deviceName.includes('gemini') || deviceName.includes('google')) {
|
|
35389
35413
|
providerType = 'Google';
|
|
35414
|
+
providerTypeEnum = 'openai'; // Google uses OpenAI-compatible format
|
|
35390
35415
|
}
|
|
35391
35416
|
else if (deviceName.includes('llama')) {
|
|
35392
35417
|
providerType = 'llama.cpp';
|
|
35418
|
+
providerTypeEnum = 'openai'; // llama.cpp uses OpenAI-compatible format
|
|
35419
|
+
}
|
|
35420
|
+
else if (pluginId.includes('@scrypted/llm') || pluginId.includes('llm')) {
|
|
35421
|
+
providerType = 'Scrypted LLM';
|
|
35422
|
+
providerTypeEnum = 'unknown';
|
|
35393
35423
|
}
|
|
35394
35424
|
this.llmDevice = device;
|
|
35395
35425
|
this.llmProvider = `${providerType} (${device.name})`;
|
|
35426
|
+
this.llmProviderType = providerTypeEnum;
|
|
35396
35427
|
this.console.log(`[LLM] Connected to ${providerType}: ${device.name}`);
|
|
35397
35428
|
this.console.log(`[LLM] Plugin: ${pluginId || 'N/A'}`);
|
|
35429
|
+
this.console.log(`[LLM] Image format: ${providerTypeEnum}`);
|
|
35398
35430
|
this.console.log(`[LLM] Interfaces: ${device.interfaces?.join(', ')}`);
|
|
35399
35431
|
return this.llmDevice;
|
|
35400
35432
|
}
|
|
@@ -35412,6 +35444,10 @@ class SpatialReasoningEngine {
|
|
|
35412
35444
|
getLlmProvider() {
|
|
35413
35445
|
return this.llmProvider;
|
|
35414
35446
|
}
|
|
35447
|
+
/** Get the current LLM provider type for image format selection */
|
|
35448
|
+
getLlmProviderType() {
|
|
35449
|
+
return this.llmProviderType;
|
|
35450
|
+
}
|
|
35415
35451
|
/** Check if LLM is available */
|
|
35416
35452
|
isLlmAvailable() {
|
|
35417
35453
|
return this.llmDevice !== null;
|
|
@@ -35688,10 +35724,10 @@ class SpatialReasoningEngine {
|
|
|
35688
35724
|
// Build message content - use multimodal format if we have an image
|
|
35689
35725
|
let messageContent;
|
|
35690
35726
|
if (imageData) {
|
|
35691
|
-
// Vision-capable multimodal message format (
|
|
35727
|
+
// Vision-capable multimodal message format (provider-specific)
|
|
35692
35728
|
messageContent = [
|
|
35693
35729
|
{ type: 'text', text: prompt },
|
|
35694
|
-
buildImageContent(imageData),
|
|
35730
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
35695
35731
|
];
|
|
35696
35732
|
}
|
|
35697
35733
|
else {
|
|
@@ -35774,10 +35810,10 @@ If no clear landmark is identifiable, respond with: {"name": null}`;
|
|
|
35774
35810
|
// Build message content - use multimodal format if we have an image
|
|
35775
35811
|
let messageContent;
|
|
35776
35812
|
if (imageData) {
|
|
35777
|
-
// Vision-capable multimodal message format (
|
|
35813
|
+
// Vision-capable multimodal message format (provider-specific)
|
|
35778
35814
|
messageContent = [
|
|
35779
35815
|
{ type: 'text', text: prompt },
|
|
35780
|
-
buildImageContent(imageData),
|
|
35816
|
+
buildImageContent(imageData, this.llmProviderType),
|
|
35781
35817
|
];
|
|
35782
35818
|
}
|
|
35783
35819
|
else {
|
|
@@ -36017,6 +36053,7 @@ class TopologyDiscoveryEngine {
|
|
|
36017
36053
|
topology = null;
|
|
36018
36054
|
llmDevice = null;
|
|
36019
36055
|
llmSearched = false;
|
|
36056
|
+
llmProviderType = 'unknown';
|
|
36020
36057
|
// Scene analysis cache (camera ID -> analysis)
|
|
36021
36058
|
sceneCache = new Map();
|
|
36022
36059
|
// Pending suggestions for user review
|
|
@@ -36080,8 +36117,25 @@ class TopologyDiscoveryEngine {
|
|
|
36080
36117
|
if (!device)
|
|
36081
36118
|
continue;
|
|
36082
36119
|
if (device.interfaces?.includes('ChatCompletion')) {
|
|
36120
|
+
const deviceName = device.name?.toLowerCase() || '';
|
|
36121
|
+
// Detect provider type for image format selection
|
|
36122
|
+
if (deviceName.includes('openai') || deviceName.includes('gpt')) {
|
|
36123
|
+
this.llmProviderType = 'openai';
|
|
36124
|
+
}
|
|
36125
|
+
else if (deviceName.includes('anthropic') || deviceName.includes('claude')) {
|
|
36126
|
+
this.llmProviderType = 'anthropic';
|
|
36127
|
+
}
|
|
36128
|
+
else if (deviceName.includes('ollama') || deviceName.includes('gemini') ||
|
|
36129
|
+
deviceName.includes('google') || deviceName.includes('llama')) {
|
|
36130
|
+
// These providers use OpenAI-compatible format
|
|
36131
|
+
this.llmProviderType = 'openai';
|
|
36132
|
+
}
|
|
36133
|
+
else {
|
|
36134
|
+
this.llmProviderType = 'unknown';
|
|
36135
|
+
}
|
|
36083
36136
|
this.llmDevice = device;
|
|
36084
36137
|
this.console.log(`[Discovery] Connected to LLM: ${device.name}`);
|
|
36138
|
+
this.console.log(`[Discovery] Image format: ${this.llmProviderType}`);
|
|
36085
36139
|
return this.llmDevice;
|
|
36086
36140
|
}
|
|
36087
36141
|
}
|
|
@@ -36133,14 +36187,14 @@ class TopologyDiscoveryEngine {
|
|
|
36133
36187
|
return analysis;
|
|
36134
36188
|
}
|
|
36135
36189
|
try {
|
|
36136
|
-
// Build multimodal message with
|
|
36190
|
+
// Build multimodal message with provider-specific image format
|
|
36137
36191
|
const result = await llm.getChatCompletion({
|
|
36138
36192
|
messages: [
|
|
36139
36193
|
{
|
|
36140
36194
|
role: 'user',
|
|
36141
36195
|
content: [
|
|
36142
36196
|
{ type: 'text', text: SCENE_ANALYSIS_PROMPT },
|
|
36143
|
-
(0, spatial_reasoning_1.buildImageContent)(imageData),
|
|
36197
|
+
(0, spatial_reasoning_1.buildImageContent)(imageData, this.llmProviderType),
|
|
36144
36198
|
],
|
|
36145
36199
|
},
|
|
36146
36200
|
],
|