@blueharford/scrypted-spatial-awareness 0.6.33 → 0.6.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.zip CHANGED
Binary file
@@ -36597,6 +36597,12 @@ class TopologyDiscoveryEngine {
36597
36597
  getStatus() {
36598
36598
  return { ...this.status };
36599
36599
  }
36600
+ /** Get list of LLMs excluded for lack of vision support */
36601
+ getExcludedVisionLlmNames() {
36602
+ return this.llmDevices
36603
+ .filter(l => !l.visionCapable)
36604
+ .map(l => l.name || l.id);
36605
+ }
36600
36606
  /** Get pending suggestions */
36601
36607
  getPendingSuggestions() {
36602
36608
  return Array.from(this.suggestions.values())
@@ -36648,6 +36654,7 @@ class TopologyDiscoveryEngine {
36648
36654
  providerType,
36649
36655
  lastUsed: 0,
36650
36656
  errorCount: 0,
36657
+ visionCapable: true,
36651
36658
  });
36652
36659
  this.console.log(`[Discovery] Found LLM: ${device.name}`);
36653
36660
  }
@@ -36694,6 +36701,42 @@ class TopologyDiscoveryEngine {
36694
36701
  this.console.log(`[Discovery] Selected LLM: ${selected.name}`);
36695
36702
  return selected.device;
36696
36703
  }
36704
+ /** Select an LLM device, excluding any IDs if provided */
36705
+ async selectLlmDevice(excludeIds) {
36706
+ await this.findAllLlmDevices();
36707
+ if (this.llmDevices.length === 0)
36708
+ return null;
36709
+ let bestIndex = -1;
36710
+ let bestScore = Infinity;
36711
+ for (let i = 0; i < this.llmDevices.length; i++) {
36712
+ const llm = this.llmDevices[i];
36713
+ if (excludeIds.has(llm.id))
36714
+ continue;
36715
+ if (!llm.visionCapable)
36716
+ continue;
36717
+ const score = llm.lastUsed + (llm.errorCount * 60000);
36718
+ if (score < bestScore) {
36719
+ bestScore = score;
36720
+ bestIndex = i;
36721
+ }
36722
+ }
36723
+ if (bestIndex === -1)
36724
+ return null;
36725
+ const selected = this.llmDevices[bestIndex];
36726
+ this.llmDevice = selected.device;
36727
+ this.llmProviderType = selected.providerType;
36728
+ selected.lastUsed = Date.now();
36729
+ this.console.log(`[Discovery] Selected LLM: ${selected.name}`);
36730
+ return selected.device;
36731
+ }
36732
+ isRetryableLlmError(error) {
36733
+ const errorStr = String(error).toLowerCase();
36734
+ return (errorStr.includes('404') ||
36735
+ errorStr.includes('not found') ||
36736
+ errorStr.includes('no such model') ||
36737
+ errorStr.includes('model not found') ||
36738
+ errorStr.includes('endpoint'));
36739
+ }
36697
36740
  /** Mark an LLM as having an error */
36698
36741
  markLlmError(device) {
36699
36742
  const llm = this.llmDevices.find(l => l.device === device);
@@ -36743,45 +36786,51 @@ class TopologyDiscoveryEngine {
36743
36786
  potentialOverlaps: [],
36744
36787
  isValid: false,
36745
36788
  };
36746
- const llm = await this.findLlmDevice();
36747
- if (!llm?.getChatCompletion) {
36748
- analysis.error = 'No LLM device available';
36749
- return analysis;
36750
- }
36751
36789
  const imageData = await this.getCameraSnapshot(cameraId);
36752
36790
  if (!imageData) {
36753
36791
  analysis.error = 'Failed to capture camera snapshot';
36754
36792
  return analysis;
36755
36793
  }
36756
- // Try with detected provider format first, then fallback to alternates
36757
- // The order matters: try the most likely formats first
36758
- const formatsToTry = [];
36759
- // Start with detected format
36760
- formatsToTry.push(this.llmProviderType);
36761
- // Add fallbacks based on detected provider
36762
- if (this.llmProviderType === 'openai') {
36763
- formatsToTry.push('scrypted', 'anthropic');
36764
- }
36765
- else if (this.llmProviderType === 'anthropic') {
36766
- formatsToTry.push('scrypted', 'openai');
36767
- }
36768
- else if (this.llmProviderType === 'scrypted') {
36769
- formatsToTry.push('anthropic', 'openai');
36770
- }
36771
- else {
36772
- // Unknown - try all formats
36773
- formatsToTry.push('scrypted', 'anthropic', 'openai');
36774
- }
36794
+ await this.findAllLlmDevices();
36795
+ const excludeIds = new Set();
36775
36796
  let lastError = null;
36776
- for (const formatType of formatsToTry) {
36777
- try {
36778
- this.console.log(`[Discovery] Trying ${formatType} image format for ${cameraName}...`);
36779
- // Build prompt with camera context (height)
36780
- const cameraNode = this.topology ? (0, topology_1.findCamera)(this.topology, cameraId) : null;
36781
- const mountHeight = cameraNode?.context?.mountHeight || 8;
36782
- const cameraRange = cameraNode?.fov?.range || 80;
36783
- // Add camera-specific context to the prompt
36784
- const contextPrefix = `CAMERA INFORMATION:
36797
+ const maxAttempts = Math.max(1, this.llmDevices.length || 1);
36798
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
36799
+ const llm = await this.selectLlmDevice(excludeIds);
36800
+ if (!llm?.getChatCompletion) {
36801
+ analysis.error = 'No LLM device available';
36802
+ return analysis;
36803
+ }
36804
+ let allFormatsVisionError = false;
36805
+ // Try with detected provider format first, then fallback to alternates
36806
+ // The order matters: try the most likely formats first
36807
+ const formatsToTry = [];
36808
+ // Start with detected format
36809
+ formatsToTry.push(this.llmProviderType);
36810
+ // Add fallbacks based on detected provider
36811
+ if (this.llmProviderType === 'openai') {
36812
+ formatsToTry.push('scrypted', 'anthropic');
36813
+ }
36814
+ else if (this.llmProviderType === 'anthropic') {
36815
+ formatsToTry.push('scrypted', 'openai');
36816
+ }
36817
+ else if (this.llmProviderType === 'scrypted') {
36818
+ formatsToTry.push('anthropic', 'openai');
36819
+ }
36820
+ else {
36821
+ // Unknown - try all formats
36822
+ formatsToTry.push('scrypted', 'anthropic', 'openai');
36823
+ }
36824
+ let visionFormatFailures = 0;
36825
+ for (const formatType of formatsToTry) {
36826
+ try {
36827
+ this.console.log(`[Discovery] Trying ${formatType} image format for ${cameraName}...`);
36828
+ // Build prompt with camera context (height)
36829
+ const cameraNode = this.topology ? (0, topology_1.findCamera)(this.topology, cameraId) : null;
36830
+ const mountHeight = cameraNode?.context?.mountHeight || 8;
36831
+ const cameraRange = cameraNode?.fov?.range || 80;
36832
+ // Add camera-specific context to the prompt
36833
+ const contextPrefix = `CAMERA INFORMATION:
36785
36834
  - Camera Name: ${cameraName}
36786
36835
  - Mount Height: ${mountHeight} feet above ground
36787
36836
  - Approximate viewing range: ${cameraRange} feet
@@ -36789,96 +36838,122 @@ class TopologyDiscoveryEngine {
36789
36838
  Use the mount height to help estimate distances - objects at ground level will appear at different angles depending on distance from a camera mounted at ${mountHeight} feet.
36790
36839
 
36791
36840
  `;
36792
- // Build multimodal message with provider-specific image format
36793
- const result = await llm.getChatCompletion({
36794
- messages: [
36795
- {
36796
- role: 'user',
36797
- content: [
36798
- { type: 'text', text: contextPrefix + SCENE_ANALYSIS_PROMPT },
36799
- (0, spatial_reasoning_1.buildImageContent)(imageData, formatType),
36800
- ],
36801
- },
36802
- ],
36803
- max_tokens: 4000, // Increased for detailed scene analysis
36804
- temperature: 0.3,
36805
- });
36806
- const content = result?.choices?.[0]?.message?.content;
36807
- if (content && typeof content === 'string') {
36808
- try {
36809
- // Extract JSON from response (handle markdown code blocks)
36810
- let jsonStr = content.trim();
36811
- if (jsonStr.startsWith('```')) {
36812
- jsonStr = jsonStr.replace(/```json?\n?/g, '').replace(/```$/g, '').trim();
36813
- }
36814
- // Try to recover truncated JSON
36815
- const parsed = this.parseJsonWithRecovery(jsonStr, cameraName);
36816
- // Map parsed data to our types
36817
- if (Array.isArray(parsed.landmarks)) {
36818
- analysis.landmarks = parsed.landmarks.map((l) => ({
36819
- name: l.name || 'Unknown',
36820
- type: this.mapLandmarkType(l.type),
36821
- confidence: typeof l.confidence === 'number' ? l.confidence : 0.7,
36822
- distance: this.mapDistance(l.distance),
36823
- description: l.description || '',
36824
- boundingBox: l.boundingBox,
36825
- }));
36826
- }
36827
- if (Array.isArray(parsed.zones)) {
36828
- analysis.zones = parsed.zones.map((z) => ({
36829
- name: z.name || 'Unknown',
36830
- type: this.mapZoneType(z.type),
36831
- coverage: typeof z.coverage === 'number' ? z.coverage : 0.5,
36832
- description: z.description || '',
36833
- boundingBox: z.boundingBox,
36834
- distance: this.mapDistance(z.distance), // Parse distance for zones too
36835
- }));
36836
- }
36837
- if (parsed.edges && typeof parsed.edges === 'object') {
36838
- analysis.edges = {
36839
- top: parsed.edges.top || '',
36840
- left: parsed.edges.left || '',
36841
- right: parsed.edges.right || '',
36842
- bottom: parsed.edges.bottom || '',
36843
- };
36844
- }
36845
- if (parsed.orientation) {
36846
- analysis.orientation = this.mapOrientation(parsed.orientation);
36841
+ // Build multimodal message with provider-specific image format
36842
+ const result = await llm.getChatCompletion({
36843
+ messages: [
36844
+ {
36845
+ role: 'user',
36846
+ content: [
36847
+ { type: 'text', text: contextPrefix + SCENE_ANALYSIS_PROMPT },
36848
+ (0, spatial_reasoning_1.buildImageContent)(imageData, formatType),
36849
+ ],
36850
+ },
36851
+ ],
36852
+ max_tokens: 4000, // Increased for detailed scene analysis
36853
+ temperature: 0.3,
36854
+ });
36855
+ const content = result?.choices?.[0]?.message?.content;
36856
+ if (content && typeof content === 'string') {
36857
+ try {
36858
+ // Extract JSON from response (handle markdown code blocks)
36859
+ let jsonStr = content.trim();
36860
+ if (jsonStr.startsWith('```')) {
36861
+ jsonStr = jsonStr.replace(/```json?\n?/g, '').replace(/```$/g, '').trim();
36862
+ }
36863
+ // Try to recover truncated JSON
36864
+ const parsed = this.parseJsonWithRecovery(jsonStr, cameraName);
36865
+ // Map parsed data to our types
36866
+ if (Array.isArray(parsed.landmarks)) {
36867
+ analysis.landmarks = parsed.landmarks.map((l) => ({
36868
+ name: l.name || 'Unknown',
36869
+ type: this.mapLandmarkType(l.type),
36870
+ confidence: typeof l.confidence === 'number' ? l.confidence : 0.7,
36871
+ distance: this.mapDistance(l.distance),
36872
+ description: l.description || '',
36873
+ boundingBox: l.boundingBox,
36874
+ }));
36875
+ }
36876
+ if (Array.isArray(parsed.zones)) {
36877
+ analysis.zones = parsed.zones.map((z) => ({
36878
+ name: z.name || 'Unknown',
36879
+ type: this.mapZoneType(z.type),
36880
+ coverage: typeof z.coverage === 'number' ? z.coverage : 0.5,
36881
+ description: z.description || '',
36882
+ boundingBox: z.boundingBox,
36883
+ distance: this.mapDistance(z.distance), // Parse distance for zones too
36884
+ }));
36885
+ }
36886
+ if (parsed.edges && typeof parsed.edges === 'object') {
36887
+ analysis.edges = {
36888
+ top: parsed.edges.top || '',
36889
+ left: parsed.edges.left || '',
36890
+ right: parsed.edges.right || '',
36891
+ bottom: parsed.edges.bottom || '',
36892
+ };
36893
+ }
36894
+ if (parsed.orientation) {
36895
+ analysis.orientation = this.mapOrientation(parsed.orientation);
36896
+ }
36897
+ analysis.isValid = true;
36898
+ this.console.log(`[Discovery] Analyzed ${cameraName}: ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones (using ${formatType} format)`);
36899
+ // Update the preferred format for future requests
36900
+ if (formatType !== this.llmProviderType) {
36901
+ this.console.log(`[Discovery] Switching to ${formatType} format for future requests`);
36902
+ this.llmProviderType = formatType;
36903
+ }
36904
+ // Success - exit the retry loop
36905
+ return analysis;
36847
36906
  }
36848
- analysis.isValid = true;
36849
- this.console.log(`[Discovery] Analyzed ${cameraName}: ${analysis.landmarks.length} landmarks, ${analysis.zones.length} zones (using ${formatType} format)`);
36850
- // Update the preferred format for future requests
36851
- if (formatType !== this.llmProviderType) {
36852
- this.console.log(`[Discovery] Switching to ${formatType} format for future requests`);
36853
- this.llmProviderType = formatType;
36907
+ catch (parseError) {
36908
+ this.console.warn(`[Discovery] Failed to parse LLM response for ${cameraName}:`, parseError);
36909
+ analysis.error = 'Failed to parse LLM response';
36910
+ return analysis;
36854
36911
  }
36855
- // Success - exit the retry loop
36856
- return analysis;
36857
36912
  }
36858
- catch (parseError) {
36859
- this.console.warn(`[Discovery] Failed to parse LLM response for ${cameraName}:`, parseError);
36860
- analysis.error = 'Failed to parse LLM response';
36861
- return analysis;
36913
+ }
36914
+ catch (e) {
36915
+ lastError = e;
36916
+ // Check if this is a vision/multimodal format error
36917
+ if ((0, spatial_reasoning_1.isVisionFormatError)(e)) {
36918
+ this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
36919
+ visionFormatFailures++;
36920
+ continue; // Try next format
36862
36921
  }
36922
+ // Retry with a different LLM if error indicates bad endpoint/model
36923
+ if (this.isRetryableLlmError(e)) {
36924
+ this.console.warn(`[Discovery] LLM error for ${cameraName}, trying another provider...`);
36925
+ this.markLlmError(llm);
36926
+ const llmEntry = this.llmDevices.find(d => d.device === llm);
36927
+ if (llmEntry) {
36928
+ excludeIds.add(llmEntry.id);
36929
+ }
36930
+ break;
36931
+ }
36932
+ // Not a format error - don't retry
36933
+ this.console.warn(`[Discovery] Scene analysis failed for ${cameraName}:`, e);
36934
+ break;
36863
36935
  }
36864
36936
  }
36865
- catch (e) {
36866
- lastError = e;
36867
- // Check if this is a vision/multimodal format error
36868
- if ((0, spatial_reasoning_1.isVisionFormatError)(e)) {
36869
- this.console.warn(`[Discovery] ${formatType} format failed, trying fallback...`);
36870
- continue; // Try next format
36871
- }
36872
- // Not a format error - don't retry
36873
- this.console.warn(`[Discovery] Scene analysis failed for ${cameraName}:`, e);
36874
- break;
36937
+ allFormatsVisionError = visionFormatFailures > 0 && visionFormatFailures === formatsToTry.length;
36938
+ if (allFormatsVisionError) {
36939
+ const llmEntry = this.llmDevices.find(d => d.device === llm);
36940
+ if (llmEntry) {
36941
+ llmEntry.visionCapable = false;
36942
+ excludeIds.add(llmEntry.id);
36943
+ this.console.warn(`[Discovery] ${llmEntry.name} does not support vision. Excluding from discovery.`);
36944
+ }
36875
36945
  }
36876
36946
  }
36877
36947
  // All formats failed
36878
36948
  if (lastError) {
36879
36949
  // Track error for load balancing
36880
- if (llm) {
36881
- this.markLlmError(llm);
36950
+ // Note: llm may be null here if no device was available
36951
+ if (lastError && !this.isRetryableLlmError(lastError)) {
36952
+ // Best-effort error accounting for the most recent device
36953
+ const lastDevice = this.llmDevice;
36954
+ if (lastDevice) {
36955
+ this.markLlmError(lastDevice);
36956
+ }
36882
36957
  }
36883
36958
  const errorStr = String(lastError);
36884
36959
  if ((0, spatial_reasoning_1.isVisionFormatError)(lastError)) {
@@ -40166,6 +40241,19 @@ class SpatialAwarenessPlugin extends sdk_1.ScryptedDeviceBase {
40166
40241
  addGroup('AI & Spatial Reasoning');
40167
40242
  // ==================== 8. Auto-Topology Discovery ====================
40168
40243
  addGroup('Auto-Topology Discovery');
40244
+ if (this.discoveryEngine) {
40245
+ const excluded = this.discoveryEngine.getExcludedVisionLlmNames();
40246
+ if (excluded.length > 0) {
40247
+ settings.push({
40248
+ key: 'excludedVisionLlms',
40249
+ title: 'Excluded LLMs (No Vision)',
40250
+ type: 'string',
40251
+ readonly: true,
40252
+ value: excluded.join(', '),
40253
+ group: 'Auto-Topology Discovery',
40254
+ });
40255
+ }
40256
+ }
40169
40257
  // ==================== 9. MQTT Integration ====================
40170
40258
  addGroup('MQTT Integration');
40171
40259
  return settings;
@@ -40179,6 +40267,7 @@ class SpatialAwarenessPlugin extends sdk_1.ScryptedDeviceBase {
40179
40267
  key === 'lostTimeout' ||
40180
40268
  key === 'useVisualMatching' ||
40181
40269
  key === 'loiteringThreshold' ||
40270
+ key === 'minDetectionScore' ||
40182
40271
  key === 'objectAlertCooldown' ||
40183
40272
  key === 'useLlmDescriptions' ||
40184
40273
  key === 'llmDebounceInterval' ||
@@ -40291,7 +40380,7 @@ class SpatialAwarenessPlugin extends sdk_1.ScryptedDeviceBase {
40291
40380
  }
40292
40381
  // Training Mode endpoints
40293
40382
  if (path.endsWith('/api/training/start')) {
40294
- return this.handleTrainingStartRequest(request, response);
40383
+ return await this.handleTrainingStartRequest(request, response);
40295
40384
  }
40296
40385
  if (path.endsWith('/api/training/pause')) {
40297
40386
  return this.handleTrainingPauseRequest(response);
@@ -40861,13 +40950,23 @@ class SpatialAwarenessPlugin extends sdk_1.ScryptedDeviceBase {
40861
40950
  }
40862
40951
  }
40863
40952
  // ==================== Training Mode Handlers ====================
40864
- handleTrainingStartRequest(request, response) {
40953
+ async handleTrainingStartRequest(request, response) {
40865
40954
  if (!this.trackingEngine) {
40866
- response.send(JSON.stringify({ error: 'Tracking engine not running. Configure topology first.' }), {
40867
- code: 500,
40868
- headers: { 'Content-Type': 'application/json' },
40869
- });
40870
- return;
40955
+ const topologyJson = this.storage.getItem('topology');
40956
+ const topology = topologyJson ? JSON.parse(topologyJson) : (0, topology_1.createEmptyTopology)();
40957
+ if (!topology.cameras?.length) {
40958
+ const cameras = this.buildTopologyCamerasFromSettings();
40959
+ if (cameras.length === 0) {
40960
+ response.send(JSON.stringify({ error: 'No cameras configured. Select tracked cameras first.' }), {
40961
+ code: 400,
40962
+ headers: { 'Content-Type': 'application/json' },
40963
+ });
40964
+ return;
40965
+ }
40966
+ topology.cameras = cameras;
40967
+ this.storage.setItem('topology', JSON.stringify(topology));
40968
+ }
40969
+ await this.startTrackingEngine(topology);
40871
40970
  }
40872
40971
  try {
40873
40972
  let config;
@@ -41576,6 +41675,25 @@ Access the visual topology editor at \`/ui/editor\` to configure camera relation
41576
41675
  const topologyJson = this.storage.getItem('topology');
41577
41676
  return topologyJson ? JSON.parse(topologyJson) : null;
41578
41677
  }
41678
+ buildTopologyCamerasFromSettings() {
41679
+ const value = this.storageSettings.values.trackedCameras;
41680
+ const cameraIds = Array.isArray(value)
41681
+ ? value.filter(Boolean)
41682
+ : typeof value === 'string' && value.length
41683
+ ? [value]
41684
+ : [];
41685
+ return cameraIds.map((deviceId) => {
41686
+ const device = systemManager.getDeviceById(deviceId);
41687
+ return {
41688
+ deviceId,
41689
+ nativeId: device?.nativeId || deviceId,
41690
+ name: device?.name || deviceId,
41691
+ isEntryPoint: false,
41692
+ isExitPoint: false,
41693
+ trackClasses: [],
41694
+ };
41695
+ });
41696
+ }
41579
41697
  }
41580
41698
  exports.SpatialAwarenessPlugin = SpatialAwarenessPlugin;
41581
41699
  exports["default"] = SpatialAwarenessPlugin;